Commit ca4a0592 authored by Levin Zimmermann's avatar Levin Zimmermann

erp5_wendelin/DataArray += .gcOrphanedArrays

This new method helps to free storage space of a Wendelin instance
in case 'DataArray.setArray(None)' has been called before.

Previously assigned ZBigArrays are orphaned (no longer related
to any other object). In order to free some storage space, we can
therefore remove them from the database by explicitly garbage
collecting them.
parent 4b89606b
Pipeline #38248 passed with stage
in 0 seconds
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
############################################################################## ##############################################################################
# #
# Copyright (c) 2015 Nexedi SA and Contributors. All Rights Reserved. # Copyright (c) 2015-2024 Nexedi SA and Contributors. All Rights Reserved.
# Ivan Tyagov <ivan@nexedi.com> # Ivan Tyagov <ivan@nexedi.com>
# Levin Zimmermann <levin.zimmermann@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your # it under the terms of the GNU General Public License version 3, or (at your
...@@ -153,6 +154,30 @@ class DataArray(BigFile): ...@@ -153,6 +154,30 @@ class DataArray(BigFile):
# return default view # return default view
return self.view() return self.view()
def gcOrphanedArrays(self):
"""
If 'setArray(None)' has been called, the previously assigned
ZBigArrays are orphaned (no longer related to any other object).
In order to free some storage space, we can therefore remove
them from the database by explicitly garbage collecting them.
Beware: calling this breaks ZODB undo functionality.
"""
conn = self._p_jar
storage = conn.db().storage
is_orphan = self.getArray() is None
oid, tid = self._p_oid, self._p_serial
while 1:
data = storage.loadBefore(oid, tid)
if not data:
break
tid = data[1]
state = conn.oldstate(self, tid)
array = state.get('array')
if is_orphan and array: # GC current ZBigArray
self.Base_deleteZBigArray(array)
is_orphan = array is None
# FIXME this duplicates a lot of code from ERP5's BigFile # FIXME this duplicates a lot of code from ERP5's BigFile
# -> TODO reuse BigFile streaming capability without copying its code # -> TODO reuse BigFile streaming capability without copying its code
def _range_request_handler(self, REQUEST, RESPONSE): def _range_request_handler(self, REQUEST, RESPONSE):
......
...@@ -927,3 +927,29 @@ result = [x for x in data_bucket_stream.getBucketIndexKeySequenceByIndex()] ...@@ -927,3 +927,29 @@ result = [x for x in data_bucket_stream.getBucketIndexKeySequenceByIndex()]
assertNotInDB(arr.zfile) assertNotInDB(arr.zfile)
assertNotInDB(arr.zfile.blktab) assertNotInDB(arr.zfile.blktab)
assertNotInDB(blk) assertNotInDB(blk)
@func
def test_20_gcOrphanedArrays(self):
"""Ensure 'gcOrphanedArrays' garbage collects all ZBigArray that were orphaned by a 'Data Array'"""
portal = self.portal
db = portal._p_jar.db()
data_array = portal.data_array_module.newContent(portal_type = "Data Array")
self.tic()
orphan_list = []
for _ in range(10):
data_array.initArray((1,), int)
orphan_list.append(data_array.getArray())
data_array.setArray(None)
self.tic()
@func
def mapoids(f):
tempconn = db.open()
defer(tempconn.close)
for arr in orphan_list:
f(tempconn, arr._p_oid)
mapoids(lambda conn, oid: conn.get(oid)) # before GC, orphans are still in DB
data_array.gcOrphanedArrays()
mapoids(lambda conn, oid: self.assertRaises(POSKeyError, conn.get, oid))
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment