Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neoppod
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
1
Issues
1
List
Boards
Labels
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
neoppod
Commits
081c502b
Commit
081c502b
authored
Mar 06, 2019
by
Julien Muchembled
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
client: new cache algorithm
parent
c84c48ee
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
171 additions
and
224 deletions
+171
-224
neo/client/cache.py
neo/client/cache.py
+170
-223
neo/tests/threaded/test.py
neo/tests/threaded/test.py
+1
-1
No files found.
neo/client/cache.py
View file @
081c502b
...
@@ -15,26 +15,46 @@
...
@@ -15,26 +15,46 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from
__future__
import
division
from
__future__
import
division
import
math
from
BTrees.LOBTree
import
LOBTree
from
bisect
import
insort
from
gc
import
get_referents
from
struct
import
Struct
from
sys
import
getsizeof
s
=
Struct
(
'd'
)
pack_double
=
s
.
pack
unpack_double
=
s
.
unpack
s
=
Struct
(
'q'
)
pack_long
=
s
.
pack
unpack_long
=
s
.
unpack
del
s
def
internalSizeOfBTree
(
x
):
module
=
type
(
x
).
__module__
seen
=
set
()
left
=
[
x
]
size
=
0
while
left
:
x
=
left
.
pop
()
seen
.
add
(
x
)
size
+=
getsizeof
(
x
)
left
.
extend
(
x
for
x
in
get_referents
(
x
)
if
type
(
x
).
__module__
==
module
and
x
not
in
seen
)
return
size
class
CacheItem
(
object
):
class
CacheItem
(
object
):
__slots__
=
(
'oid'
,
'tid'
,
'next_tid'
,
'data'
,
__slots__
=
'oid'
,
'tid'
,
'next_tid'
,
'data'
,
'counter'
,
'expire'
'counter'
,
'level'
,
'expire'
,
'prev'
,
'next'
)
def
__repr__
(
self
):
def
__repr__
(
self
):
s
=
''
s
=
''
for
attr
in
self
.
__slots__
:
for
attr
in
self
.
__slots__
:
try
:
try
:
value
=
getattr
(
self
,
attr
)
value
=
getattr
(
self
,
attr
)
if
value
:
if
attr
==
'data'
:
if
attr
in
(
'prev'
,
'next'
):
s
+=
' len(%s)=%s'
%
(
attr
,
len
(
value
))
s
+=
' %s=<...>'
%
attr
continue
continue
if
attr
==
'expire'
:
elif
attr
==
'data'
:
value
=
unpack_double
(
pack_long
(
value
))[
0
]
value
=
'...'
s
+=
' %s=%r'
%
(
attr
,
value
)
s
+=
' %s=%r'
%
(
attr
,
value
)
except
AttributeError
:
except
AttributeError
:
pass
pass
...
@@ -44,261 +64,186 @@ class CacheItem(object):
...
@@ -44,261 +64,186 @@ class CacheItem(object):
return
self
.
tid
<
other
.
tid
return
self
.
tid
<
other
.
tid
class
ClientCache
(
object
):
class
ClientCache
(
object
):
"""In-memory pickle cache based on
Multi-Queue
cache algorithm
"""In-memory pickle cache based on
LFRU
cache algorithm
Multi-Queue algorithm for Second Level Buffer Caches:
This Least Frequent Recently Used implementation is adapted to handle
https://www.usenix.org/event/usenix01/full_papers/zhou/zhou_html/index.html
records of different sizes. This is possible thanks to a B+Tree: the use
of such a complex structure for a cache is quite unusual for a cache
but we use a C implementation that's relatively fast compared to the
cost of a cache miss.
Quick description:
This algorithm adapts well regardless its maximum allowed size,
- There are multiple "regular" queues, plus a history queue
without any tweak.
- The queue to store an object in depends on its access frequency
- The queue an object is in defines its lifespan (higher-index queue eq.
longer lifespan)
-> The more often an object is accessed, the higher lifespan it will
have
- Upon cache or history hit, object frequency is increased and object
might get moved to longer-lived queue
- Each access "ages" objects in cache, and an aging object is moved to
shorter-lived queue as it ages without being accessed, or in the
history queue if it's really too old.
- The history queue only contains items with counter > 0
"""
"""
__slots__
=
(
'max_size'
,
'_life_time'
,
'_max_history_size'
,
__slots__
=
(
'max_size'
,
'_oid_dict'
,
'_size'
,
'_added'
,
'_items'
,
'_queue_list'
,
'_oid_dict'
,
'_time'
,
'_size'
,
'_history_size'
,
'_nhit'
,
'_nmiss'
)
'_nhit'
,
'_nmiss'
)
def
__init__
(
self
,
life_time
=
10000
,
max_history_size
=
100000
,
def
__init__
(
self
,
max_size
=
20
*
1024
*
1024
):
max_size
=
20
*
1024
*
1024
):
self
.
_life_time
=
life_time
self
.
_max_history_size
=
max_history_size
self
.
max_size
=
max_size
self
.
max_size
=
max_size
self
.
clear
()
self
.
clear
()
def
clear
(
self
):
def
clear
(
self
):
"""Reset cache"""
"""Reset cache"""
self
.
_queue_list
=
[
None
]
# first is history
self
.
_oid_dict
=
{}
self
.
_oid_dict
=
{}
self
.
_time
=
0
self
.
_size
=
self
.
_nhit
=
self
.
_nmiss
=
0
self
.
_size
=
0
# Make sure to never produce negative keys, else
self
.
_history_size
=
0
# we could not manipulate them when encoded as integers.
self
.
_nhit
=
self
.
_nmiss
=
0
self
.
_added
=
self
.
max_size
self
.
_items
=
LOBTree
()
def
__repr__
(
self
):
def
__repr__
(
self
):
nload
=
self
.
_nhit
+
self
.
_nmiss
nload
=
self
.
_nhit
+
self
.
_nmiss
return
(
"<%s #loads=%s #oids=%s size=%s
time=%s queue_length=%r
"
return
(
"<%s #loads=%s #oids=%s size=%s
#items=%s
"
"
(life_time=%s max_history_size=%s
max_size=%s)>"
)
%
(
"
btree_overhead=%s (
max_size=%s)>"
)
%
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
,
nload
and
'%s (%.3g%% hit)'
%
(
nload
,
100
*
self
.
_nhit
/
nload
),
nload
and
'%s (%.3g%% hit)'
%
(
nload
,
100
*
self
.
_nhit
/
nload
),
len
(
self
.
_oid_dict
),
self
.
_size
,
self
.
_time
,
len
(
self
.
_oid_dict
),
self
.
_size
,
len
(
self
.
_items
),
[
self
.
_history_size
]
+
[
internalSizeOfBTree
(
self
.
_items
),
sum
(
1
for
_
in
self
.
_iterQueue
(
level
))
self
.
max_size
)
for
level
in
xrange
(
1
,
len
(
self
.
_queue_list
))],
self
.
_life_time
,
self
.
_max_history_size
,
self
.
max_size
)
def
_iterQueue
(
self
,
level
):
"""for debugging purpose"""
if
level
<
len
(
self
.
_queue_list
):
# Lockless iteration of the queue.
# XXX: In case of race condition, the result is wrong but at least,
# it won't loop endlessly. If one want to collect accurate
# statistics, a lock should be used.
expire
=
0
item
=
self
.
_queue_list
[
level
]
while
item
and
item
.
level
==
level
and
expire
<
item
.
expire
:
yield
item
expire
=
item
.
expire
item
=
item
.
next
def
_remove_from_oid_dict
(
self
,
item
):
item_list
=
self
.
_oid_dict
[
item
.
oid
]
item_list
.
remove
(
item
)
if
not
item_list
:
del
self
.
_oid_dict
[
item
.
oid
]
def
_add
(
self
,
item
):
level
=
item
.
level
try
:
head
=
self
.
_queue_list
[
level
]
except
IndexError
:
assert
len
(
self
.
_queue_list
)
==
level
self
.
_queue_list
.
append
(
item
)
item
.
prev
=
item
.
next
=
item
else
:
if
head
:
item
.
prev
=
tail
=
head
.
prev
tail
.
next
=
head
.
prev
=
item
item
.
next
=
head
else
:
self
.
_queue_list
[
level
]
=
item
item
.
prev
=
item
.
next
=
item
if
level
:
item
.
expire
=
self
.
_time
+
self
.
_life_time
else
:
self
.
_empty
(
item
)
self
.
_history_size
+=
1
if
self
.
_max_history_size
<
self
.
_history_size
:
self
.
_remove
(
head
)
self
.
_remove_from_oid_dict
(
head
)
def
_empty
(
self
,
item
):
self
.
_size
-=
len
(
item
.
data
)
item
.
data
=
None
def
_remove
(
self
,
item
):
level
=
item
.
level
if
level
is
not
None
:
if
level
:
item
.
level
=
level
-
1
else
:
self
.
_history_size
-=
1
next
=
item
.
next
if
next
is
item
:
self
.
_queue_list
[
level
]
=
next
=
None
else
:
item
.
prev
.
next
=
next
next
.
prev
=
item
.
prev
if
self
.
_queue_list
[
level
]
is
item
:
self
.
_queue_list
[
level
]
=
next
return
next
def
_fetched
(
self
,
item
,
_log
=
math
.
log
):
self
.
_remove
(
item
)
item
.
counter
=
counter
=
item
.
counter
+
1
# XXX It might be better to adjust the level according to the object
# size. See commented factor for example.
item
.
level
=
1
+
int
(
_log
(
counter
,
2
)
# * (1.01 - len(item.data) / self.max_size)
)
self
.
_add
(
item
)
self
.
_time
=
time
=
self
.
_time
+
1
for
head
in
self
.
_queue_list
[
1
:]:
if
head
and
head
.
expire
<
time
:
self
.
_remove
(
head
)
if
head
.
level
or
head
.
counter
:
self
.
_add
(
head
)
else
:
self
.
_empty
(
head
)
self
.
_remove_from_oid_dict
(
head
)
break
def
_load
(
self
,
oid
,
before_tid
=
None
):
def
_load
(
self
,
oid
,
before_tid
=
None
):
item_list
=
self
.
_oid_dict
.
get
(
oid
)
item_list
=
self
.
_oid_dict
.
get
(
oid
)
if
item_list
:
if
item_list
:
if
before_tid
:
if
before_tid
:
for
item
in
reversed
(
item_list
)
:
for
item
in
item_list
:
if
item
.
tid
<
before_tid
:
if
item
.
tid
<
before_tid
:
next_tid
=
item
.
next_tid
next_tid
=
item
.
next_tid
if
next_tid
and
next_tid
<
before_tid
:
if
next_tid
and
next_tid
<
before_tid
:
break
break
return
item
return
item
else
:
else
:
item
=
item_list
[
-
1
]
item
=
item_list
[
0
]
if
not
item
.
next_tid
:
if
not
item
.
next_tid
:
return
item
return
item
def
load
(
self
,
oid
,
before_tid
=
None
):
def
load
(
self
,
oid
,
before_tid
):
"""Return a revision of oid that was current before given tid"""
"""Return a revision of oid that was current before given tid"""
item
=
self
.
_load
(
oid
,
before_tid
)
item
=
self
.
_load
(
oid
,
before_tid
)
if
item
:
if
item
:
d
ata
=
item
.
data
d
el
self
.
_items
[
item
.
expire
]
i
f
data
is
not
None
:
i
tem
.
counter
+=
1
self
.
_nhit
+=
1
self
.
_add
(
item
)
self
.
_fetched
(
item
)
self
.
_nhit
+=
1
return
data
,
item
.
tid
,
item
.
next_tid
return
item
.
data
,
item
.
tid
,
item
.
next_tid
self
.
_nmiss
+=
1
self
.
_nmiss
+=
1
def
_forget
(
self
,
item
):
items
=
self
.
_oid_dict
[
item
.
oid
]
items
.
remove
(
item
)
if
not
items
:
del
self
.
_oid_dict
[
item
.
oid
]
self
.
_size
-=
len
(
item
.
data
)
del
self
.
_items
[
item
.
expire
]
def
_add
(
self
,
item
):
# The initial idea was to compute keys as follows:
# (added - size) * item.counter
# However, after running for a long time, this tends to degenerate:
# - size become more and more negligible over time
# - objects that are most often accessed become impossible to remove,
# making the cache too slow to adapt after a change of workload
# - 64 bits is not enough
# This was solved in several ways, by using the following formula:
# min_key - size + (added - min_key) * item.counter
# and doubles.
# BTrees does not have an optimized class for doubles so we encode
# them as integers, which preserve the same order as long as they're
# positive (hence some extra tweak to avoid negative numbers in some
# rare cases) and it becomes easier to compute the next double
# (+1 instead of libm.nextafter). The downside is that conversion
# between double and long is a bit expensive in Python.
added
=
self
.
_added
items
=
self
.
_items
try
:
x
=
items
.
minKey
()
except
ValueError
:
x
=
added
else
:
# Most of the time, the smallest key is smaller than `added`. In
# the very rare case it isn't, make sure to produce a positive key.
x
=
min
(
added
,
unpack_double
(
pack_long
(
x
))[
0
])
size
=
len
(
item
.
data
)
expire
=
unpack_long
(
pack_double
(
x
-
size
+
(
added
-
x
)
*
item
.
counter
))[
0
]
for
x
in
items
.
iterkeys
(
expire
):
if
x
!=
expire
:
break
expire
+=
1
self
.
_added
=
added
+
size
item
.
expire
=
expire
items
[
expire
]
=
item
def
store
(
self
,
oid
,
data
,
tid
,
next_tid
):
def
store
(
self
,
oid
,
data
,
tid
,
next_tid
):
"""Store a new data record in the cache"""
"""Store a new data record in the cache"""
size
=
len
(
data
)
size
=
len
(
data
)
max_size
=
self
.
max_size
max_size
=
self
.
max_size
if
size
<
max_size
:
if
size
<
max_size
:
item
=
self
.
_load
(
oid
,
next_tid
)
i
=
0
if
item
:
try
:
# We don't handle late invalidations for cached oids, because
items
=
self
.
_oid_dict
[
oid
]
# the caller is not supposed to explicitly asks for tids after
except
KeyError
:
# app.last_tid (and the cache should be empty when app.last_tid
items
=
self
.
_oid_dict
[
oid
]
=
[]
# is still None).
counter
=
1
assert
item
.
tid
==
tid
,
(
item
,
tid
)
if
item
.
level
:
# already stored
assert
item
.
next_tid
==
next_tid
and
item
.
data
==
data
return
assert
not
item
.
data
# Possible case of late invalidation.
item
.
next_tid
=
next_tid
else
:
else
:
item
=
CacheItem
()
for
item
in
items
:
item
.
oid
=
oid
if
item
.
tid
<
tid
:
item
.
tid
=
tid
assert
None
is
not
item
.
next_tid
<=
tid
item
.
next_tid
=
next_tid
break
item
.
counter
=
0
if
item
.
tid
==
tid
:
item
.
level
=
None
# We don't handle late invalidations for cached oids,
try
:
# because the caller is not supposed to explicitly asks
item_list
=
self
.
_oid_dict
[
oid
]
# for tids after app.last_tid (and the cache should be
except
KeyError
:
# empty when app.last_tid is still None).
self
.
_oid_dict
[
oid
]
=
[
item
]
assert
item
.
next_tid
==
next_tid
and
item
.
data
==
data
return
i
+=
1
if
next_tid
:
counter
=
1
else
:
else
:
if
next_tid
:
counter
=
item
.
counter
insort
(
item_list
,
item
)
if
counter
!=
1
:
else
:
del
self
.
_items
[
item
.
expire
]
prev
=
item_list
[
-
1
]
item
.
counter
=
1
assert
prev
.
next_tid
<=
tid
,
(
prev
,
item
)
self
.
_add
(
item
)
item
.
counter
=
prev
.
counter
item
=
CacheItem
()
if
prev
.
level
:
item
.
oid
=
oid
prev
.
counter
=
0
item
.
tid
=
tid
if
prev
.
level
>
1
:
item
.
next_tid
=
next_tid
self
.
_fetched
(
prev
)
item_list
.
append
(
item
)
else
:
self
.
_remove
(
prev
)
item_list
[
-
1
]
=
item
item
.
data
=
data
item
.
data
=
data
self
.
_fetched
(
item
)
item
.
counter
=
counter
items
.
insert
(
i
,
item
)
self
.
_size
+=
size
self
.
_size
+=
size
if
max_size
<
self
.
_size
:
self
.
_add
(
item
)
for
head
in
self
.
_queue_list
[
1
:]:
while
max_size
<
self
.
_size
:
while
head
:
items
=
self
.
_items
next
=
self
.
_remove
(
head
)
self
.
_forget
(
items
[
items
.
minKey
()])
if
head
.
counter
:
head
.
level
=
0
self
.
_add
(
head
)
else
:
self
.
_empty
(
head
)
self
.
_remove_from_oid_dict
(
head
)
if
self
.
_size
<=
max_size
:
return
head
=
next
def
invalidate
(
self
,
oid
,
tid
):
def
invalidate
(
self
,
oid
,
tid
):
"""Mark data record as being valid only up to given tid"""
"""Mark data record as being valid only up to given tid"""
try
:
items
=
self
.
_oid_dict
.
get
(
oid
)
item
=
self
.
_oid_dict
[
oid
][
-
1
]
if
items
:
except
KeyError
:
item
=
items
[
0
]
pass
else
:
if
item
.
next_tid
is
None
:
if
item
.
next_tid
is
None
:
item
.
next_tid
=
tid
item
.
next_tid
=
tid
else
:
else
:
assert
item
.
next_tid
<=
tid
,
(
item
,
oid
,
tid
)
assert
item
.
next_tid
<=
tid
,
(
item
,
oid
,
tid
)
def
clear_current
(
self
):
def
clear_current
(
self
):
for
oid
,
item
_list
in
self
.
_oid_dict
.
items
():
for
oid
,
item
s
in
self
.
_oid_dict
.
items
():
item
=
item
_list
[
-
1
]
item
=
item
s
[
0
]
if
item
.
next_tid
is
None
:
if
item
.
next_tid
is
None
:
if
item
.
level
:
self
.
_forget
(
item
)
self
.
_empty
(
item
)
self
.
_remove
(
item
)
del
item_list
[
-
1
]
# We don't preserve statistics of removed items. This could be
# done easily when previous versions are cached, by copying
# counters, but it would not be fair for other oids, so it's
# probably not worth it.
if
not
item_list
:
del
self
.
_oid_dict
[
oid
]
def
test
(
self
):
def
test
(
self
):
orig_add
=
ClientCache
.
_add
def
_add
(
cache
,
item
):
orig_add
(
cache
,
item
)
self
.
assertLessEqual
(
0
,
cache
.
_items
.
minKey
())
ClientCache
.
_add
=
_add
cache
=
ClientCache
()
cache
=
ClientCache
()
repr
(
cache
)
repr
(
cache
)
self
.
assertEqual
(
cache
.
load
(
1
,
10
),
None
)
self
.
assertEqual
(
cache
.
load
(
1
,
10
),
None
)
...
@@ -324,24 +269,26 @@ def test(self):
...
@@ -324,24 +269,26 @@ def test(self):
self
.
assertEqual
(
cache
.
load
(
1
,
20
),
(
'15'
,
15
,
20
))
self
.
assertEqual
(
cache
.
load
(
1
,
20
),
(
'15'
,
15
,
20
))
cache
.
store
(
1
,
'10'
,
10
,
15
)
cache
.
store
(
1
,
'10'
,
10
,
15
)
cache
.
store
(
1
,
'20'
,
20
,
21
)
cache
.
store
(
1
,
'20'
,
20
,
21
)
self
.
assertEqual
([
5
,
10
,
15
,
20
],
[
x
.
tid
for
x
in
cache
.
_oid_dict
[
1
]])
self
.
assertEqual
([
20
,
15
,
10
,
5
],
[
x
.
tid
for
x
in
cache
.
_oid_dict
[
1
]])
self
.
assertRaises
(
AssertionError
,
cache
.
store
,
1
,
'20'
,
20
,
None
)
self
.
assertRaises
(
AssertionError
,
cache
.
store
,
1
,
'20'
,
20
,
None
)
repr
(
cache
)
repr
(
cache
)
map
(
repr
,
cache
.
_queue_list
)
cache
=
ClientCache
(
10
)
# Test late invalidations.
data1
=
"x"
,
1
,
None
cache
.
clear
()
cache
.
store
(
1
,
"x"
,
1
,
None
)
cache
.
store
(
1
,
'10*'
,
10
,
None
)
repr
(
*
cache
.
_oid_dict
[
1
])
cache
.
max_size
=
cache
.
_size
data
=
"xxxxx"
,
1
,
None
cache
.
store
(
2
,
'10'
,
10
,
15
)
cache
.
store
(
2
,
*
data
)
self
.
assertEqual
(
cache
.
_queue_list
[
0
].
oid
,
1
)
cache
.
store
(
3
,
*
data
)
cache
.
store
(
2
,
'15'
,
15
,
None
)
self
.
assertEqual
(
cache
.
load
(
1
,
None
),
data1
)
self
.
assertEqual
(
cache
.
_queue_list
[
2
].
oid
,
2
)
self
.
assertEqual
(
cache
.
load
(
2
,
None
),
None
)
# bigger records removed faster
data
=
'10'
,
10
,
15
self
.
assertEqual
(
cache
.
load
(
3
,
None
),
data
)
cache
.
store
(
1
,
*
data
)
self
.
assertEqual
(
cache
.
_size
,
6
)
self
.
assertEqual
(
cache
.
load
(
1
,
15
),
data
)
self
.
assertEqual
(
1
,
cache
.
_history_size
)
cache
.
clear_current
()
cache
.
clear_current
()
self
.
assertEqual
(
0
,
cache
.
_history_size
)
for
oid
in
0
,
1
:
cache
.
store
(
oid
,
'x'
,
1
,
None
)
cache
.
load
(
oid
,
None
)
cache
.
load
(
oid
,
None
)
cache
.
load
(
0
,
None
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
import
unittest
import
unittest
...
...
neo/tests/threaded/test.py
View file @
081c502b
...
@@ -931,7 +931,7 @@ class Test(NEOThreadedTest):
...
@@ -931,7 +931,7 @@ class Test(NEOThreadedTest):
ll
()
ll
()
x2
.
_p_deactivate
()
x2
.
_p_deactivate
()
# Remove last version of x from cache
# Remove last version of x from cache
cache
.
_
remove
(
cache
.
_oid_dict
[
x2
.
_p_oid
].
pop
()
)
cache
.
_
forget
(
cache
.
_oid_dict
[
x2
.
_p_oid
][
0
]
)
with
ll
,
Patch
(
cluster
.
client
,
_loadFromStorage
=
break_after
):
with
ll
,
Patch
(
cluster
.
client
,
_loadFromStorage
=
break_after
):
t
=
self
.
newThread
(
x2
.
_p_activate
)
t
=
self
.
newThread
(
x2
.
_p_activate
)
ll
()
ll
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment