Commit 66e106ec authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 10a7981d
...@@ -116,15 +116,29 @@ ...@@ -116,15 +116,29 @@
// Wcfs client locking organization // Wcfs client locking organization
// //
// XXX locking -> explain atMu + slaves and refer to "Locking" in wcfs.go // Wcfs client needs to synchronize regular user threads vs each other and vs
// pinner. A major lock Conn.atMu protects updates to changes to Conn's view of
// the database. Whenever atMu.W is taken - Conn.at is changing (Conn.resync),
// and contrary whenever atMu.R is taken - Conn.at is stable (roughly speaking
// Conn.resync is not running).
// //
// Conn.atMu > Conn.filehMu > FileH.mmapMu // Similarly to wcfs.go(*) several locks that protect internal data structures
// are minor to Conn.atMu - they need to be taken only under atMu.R (to
// synchronize e.g. multiple fileh open running simultaneously), but do not
// need to be taken at all if atMu.W is taken. In data structures such locks
// are noted as follows
//
// sync::Mutex xMu; // atMu.W | atMu.R + xMu
//
// After atMu, Conn.filehMu protects registry of opened file handles
// (Conn._filehTab), and FileH.mmapMu protects registry of created Mappings
// (FileH.mmaps) and FileH.pinned.
// //
// Several locks are RWMutex instead of just Mutex not only to allow more // Several locks are RWMutex instead of just Mutex not only to allow more
// concurrency, but, in the first place for correctness: pinner thread being // concurrency, but, in the first place for correctness: pinner thread being
// core element in handling WCFS isolation protocol, is effectively invoked // core element in handling WCFS isolation protocol, is effectively invoked
// synchronously from other threads via messages coming through wcfs server. // synchronously from other threads via messages coming through wcfs server.
// For example Conn.resync sends watch request to wcfs and waits for the // For example Conn.resync sends watch request to wcfs server and waits for the
// answer. Wcfs server, in turn, might send corresponding pin messages to the // answer. Wcfs server, in turn, might send corresponding pin messages to the
// pinner and _wait_ for the answer before answering to resync: // pinner and _wait_ for the answer before answering to resync:
// //
...@@ -138,24 +152,36 @@ ...@@ -138,24 +152,36 @@
// client process // client process
// //
// This creates the necessity to use RWMutex for locks that pinner and other // This creates the necessity to use RWMutex for locks that pinner and other
// parts of the code could be using at the same time in synchronous scenarious // parts of the code could be using at the same time in synchronous scenarios
// similar to the above. This locks are: // similar to the above. This locks are:
// //
// - Conn.atMu // - Conn.atMu
// - Conn.filehMu // - Conn.filehMu
// //
// XXX pinner takes the following locks (XXX recheck) // Note that FileH.mmapMu is regular - not RW - mutex, since nothing in wcfs
// client calls into wcfs server via watchlink with mmapMu held.
// //
// - wconn.filehMu.W // To synchronize with virtmem layer, wcfs client takes and releases big
// - wconn.filehMu.R // virtmem lock around places that touch virtmem (calls to virt_lock and
// virt_unlock). Also virtmem calls several wcfs client entrypoints with
// virtmem lock already taken. Thus, to avoid AB-BA style deadlocks, wcfs
// client needs to take virtmem lock as the first lock, whenever it needs to
// take both virtmem lock, and another lock - e.g. atMu(%).
//
// The ordering of locks is:
//
// virt_lock > Conn.atMu > Conn.filehMu > FileH.mmapMu
//
// The pinner takes the following locks:
// //
// - virt_lock // - virt_lock
// - wconn.atMu.R // - wconn.atMu.R
// - wconn.filehMu.R // - wconn.filehMu.R
// - fileh.mmapMu (R:.mmaps W:.pinned) // - fileh.mmapMu (to read .mmaps + write .pinned)
// //
// //
// XXX note on virt_lock in pinner and deadlocks. // (*) see "Wcfs locking organization" in wcfs.go
// (%) see related comment in Conn.__pin1 for details.
#include "wcfs_misc.h" #include "wcfs_misc.h"
...@@ -209,7 +235,7 @@ static error mmap_into_ro(void *addr, size_t size, os::File f, off_t offset); ...@@ -209,7 +235,7 @@ static error mmap_into_ro(void *addr, size_t size, os::File f, off_t offset);
// //
// _headWait is currently needed, because client stats wcfs/head/f to get f // _headWait is currently needed, because client stats wcfs/head/f to get f
// size assuming that f size only ↑. The assumption is not generally valid // size assuming that f size only ↑. The assumption is not generally valid
// (e.g. f might be truncated = hole puched for block at tail), but holds true // (e.g. f might be truncated = hole punched for block at tail), but holds true
// for now. However to get correct results wcfs/head/f has to be statt'ed // for now. However to get correct results wcfs/head/f has to be statt'ed
// _after_ wcfs view of the database becomes ≥ wconn.at. // _after_ wcfs view of the database becomes ≥ wconn.at.
// //
...@@ -218,7 +244,7 @@ static error mmap_into_ro(void *addr, size_t size, os::File f, off_t offset); ...@@ -218,7 +244,7 @@ static error mmap_into_ro(void *addr, size_t size, os::File f, off_t offset);
// file size @at will be returned by wcfs itself, which will also work if // file size @at will be returned by wcfs itself, which will also work if
// wcfs/head/f size is changed arbitrarily. // wcfs/head/f size is changed arbitrarily.
// //
// (*) equivalient might be to send something like "pin #<bsize>.. Z" (pin // (*) equivalent might be to send something like "pin #<bsize>.. Z" (pin
// blocks bsize till ∞ to zeros). // blocks bsize till ∞ to zeros).
error WCFS::_headWait(zodb::Tid at) { error WCFS::_headWait(zodb::Tid at) {
WCFS *wc = this; WCFS *wc = this;
...@@ -578,7 +604,7 @@ error _Conn::__pin1(PinReq *req) { ...@@ -578,7 +604,7 @@ error _Conn::__pin1(PinReq *req) {
// resync resyncs connection and its file mappings onto different database view. // resync resyncs connection and its file mappings onto different database view.
// //
// bigfile/_file_zob.pyx arranges to call Conn.resync at transaction boundaries // bigfile/_file_zodb.pyx arranges to call Conn.resync at transaction boundaries
// to keep Conn view in sync with updated zconn database view. // to keep Conn view in sync with updated zconn database view.
error _Conn::resync(zodb::Tid at) { error _Conn::resync(zodb::Tid at) {
_Conn& wconn = *this; _Conn& wconn = *this;
...@@ -821,8 +847,8 @@ retry: ...@@ -821,8 +847,8 @@ retry:
f->_openReady.close(); f->_openReady.close();
}); });
// do the actuall open. // do the actual open.
// we hold only wconn.atMu.R, but niether wconn.filehMu, nor f.mmapMu . // we hold only wconn.atMu.R, but neither wconn.filehMu, nor f.mmapMu .
f->_openErr = f->_open(); f->_openErr = f->_open();
if (f->_openErr != nil) if (f->_openErr != nil)
return make_pair(nil, E(f->_openErr)); return make_pair(nil, E(f->_openErr));
...@@ -1089,7 +1115,7 @@ pair<Mapping, error> _FileH::mmap(int64_t blk_start, int64_t blk_len, VMA *vma) ...@@ -1089,7 +1115,7 @@ pair<Mapping, error> _FileH::mmap(int64_t blk_start, int64_t blk_len, VMA *vma)
return make_pair(mmap, nil); return make_pair(mmap, nil);
} }
// __remmapEfault remmaps Mapping memory to cause SIGSEGV on access. // __remmapAsEfault remmaps Mapping memory to cause SIGSEGV on access.
// //
// It is used on FileH shutdown to turn all fileh mappings into incorrect ones, // It is used on FileH shutdown to turn all fileh mappings into incorrect ones,
// because after fileh is down, it is not possible to continue to provide // because after fileh is down, it is not possible to continue to provide
...@@ -1193,7 +1219,7 @@ error _Mapping::_remmapblk(int64_t blk, zodb::Tid at) { ...@@ -1193,7 +1219,7 @@ error _Mapping::_remmapblk(int64_t blk, zodb::Tid at) {
ASSERT(mmap->blk_start <= blk && blk < mmap->blk_stop()); ASSERT(mmap->blk_start <= blk && blk < mmap->blk_stop());
// a mmapping is efaulted only for closed files, i.e. fileh is removed from wconn._filehTab // a mmapping is efaulted only for closed files, i.e. fileh is removed from wconn._filehTab
// -> pinner should not see the fileh and so shuold not see this mapping. // -> pinner should not see the fileh and so should not see this mapping.
ASSERT(!mmap->efaulted); ASSERT(!mmap->efaulted);
uint8_t *blkmem = mmap->mem_start + (blk - mmap->blk_start)*f->blksize; uint8_t *blkmem = mmap->mem_start + (blk - mmap->blk_start)*f->blksize;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment