Commit e64f0e0b authored by Kirill Smelkov's avatar Kirill Smelkov

wcfs: Switch filesystem to EIO mode on zwatcher failure

Currently zwatcher failure leads to wcfs starting to provide stale data
instead of uptodate data. Fix that by detecting zwatcher failures and
explicitly switching the filesystem to a mode where any access to
anything returns "input/output error".

Zwatcher can fail on e.g. failure to retrieve transactions from ZODB
storage or any other failure. With this patch we make sure this does not
go unnoticed.
parent 323be34a
// Copyright (C) 2018-2021 Nexedi SA and Contributors.
// Copyright (C) 2018-2024 Nexedi SA and Contributors.
// Kirill Smelkov <kirr@nexedi.com>
//
// This program is free software: you can Use, Study, Modify and Redistribute
......@@ -428,6 +428,13 @@ func (f *skFile) Release() {
}
// fatalEIO switches filesystem into EIO mode and terminates the program.
func fatalEIO() {
// log.Fatal terminates the program and so any attempt to access
// was-mounted filesystem starts to return ENOTCONN
log.Fatal("switching filesystem to EIO mode")
}
// ---- parsing ----
// parseWatchFrame parses line going through /head/watch into (stream, msg)
......
......@@ -2560,8 +2560,8 @@ func _main() (err error) {
err = root.zwatcher(serveCtx, zwatchq)
if errors.Cause(err) != context.Canceled {
log.Error(err)
log.Errorf("zwatcher failed -> switching filesystem to EIO mode (TODO)")
// TODO: switch fs to EIO mode
log.Error("zwatcher failed")
fatalEIO()
}
// wait for unmount
......
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2022 Nexedi SA and Contributors.
# Copyright (C) 2018-2024 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
......@@ -44,7 +44,7 @@ import sys, os, os.path, subprocess
import six
from six.moves._thread import get_ident as gettid
from time import gmtime
from errno import EINVAL, ENOTCONN
from errno import EINVAL, ENOTCONN, ECONNABORTED
from resource import setrlimit, getrlimit, RLIMIT_MEMLOCK
from golang import go, chan, select, func, defer, error, b
from golang import context, errors, sync, time
......@@ -1824,6 +1824,46 @@ def test_wcfs_watch_2files():
# ----------------------------------------
# verify that wcfs switches to EIO mode after zwatcher failure.
# in EIO mode accessing anything on the filesystem returns ENOTCONN error.
@func
def test_wcfs_eio_after_zwatcher_fail(capfd):
t = tDB(); zf = t.zfile
def _():
with raises(IOError) as exc:
t.close()
assert exc.value.errno == ENOTCONN
defer(_)
# instead of simulating e.g. ZODB server failure we utilize the fact that
# currently zwatcher fails when there is ZBigFile epoch
t.wc._stat("head/bigfile/%s" % h(zf._p_oid)) # wcfs starts to track zf
zf.blksize += 1
with raises(IOError) as exc:
t.commit()
# transaction.commit goes ok, but reading from already opened .wcfs/zhead
# after commit returns ECONNABORTED
assert exc.value.errno == ECONNABORTED
_ = capfd.readouterr()
assert not ready(t._wcfuseaborted) # wcfs might have been killed on overall test timeout
assert "test timed out" not in _.err
assert "aborting wcfs fuse connection to unblock" not in _.err
assert "zwatcher failed" in _.err
assert "switching filesystem to EIO mode" in _.err
# verify that accessing any file returns ENOTCONN after the switch
def checkeio(path):
with raises(IOError) as exc:
t.wc._read(path)
assert exc.value.errno == ENOTCONN
checkeio(".wcfs/zurl")
checkeio("head/at")
checkeio("head/bigfile/%s" % h(zf._p_oid))
checkeio("anything")
# verify that wcfs does not panic with "no current transaction" / "at out of
# bounds" on read/invalidate/watch codepaths.
@func
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment