Commit eb43bbac authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dlm-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm

Pull dlm updates from David Teigland:

 - Delay the cleanup of interrupted posix lock requests until the user
   space result arrives. Previously, the immediate cleanup would lead to
   extraneous warnings when the result arrived.

 - Tracepoint improvements, e.g. adding the lock resource name.

 - Delay the completion of lockspace creation until one full recovery
   cycle has completed. This allows more error cases to be returned to
   the caller.

 - Remove warnings from the locking layer about delayed network replies.
   The recently added midcomms warnings are much more useful.

 - Begin the process of deprecating two unused lock-timeout-related
   features. These features now require enabling via a Kconfig option,
   and enabling them triggers deprecation warnings. We expect to remove
   the code in v6.2.

* tag 'dlm-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
  fs: dlm: move kref_put assert for lkb structs
  fs: dlm: don't use deprecated timeout features by default
  fs: dlm: add deprecation Kconfig and warnings for timeouts
  fs: dlm: remove timeout from dlm_user_adopt_orphan
  fs: dlm: remove waiter warnings
  fs: dlm: fix grammar in lowcomms output
  fs: dlm: add comment about lkb IFL flags
  fs: dlm: handle recovery result outside of ls_recover
  fs: dlm: make new_lockspace() wait until recovery completes
  fs: dlm: call dlm_lsop_recover_prep once
  fs: dlm: update comments about recovery and membership handling
  fs: dlm: add resource name to tracepoints
  fs: dlm: remove additional dereference of lksb
  fs: dlm: change ast and bast trace order
  fs: dlm: change posix lock sigint handling
  fs: dlm: use dlm_plock_info for do_unlock_close
  fs: dlm: change plock interrupted message to debug again
  fs: dlm: add pid to debug log
  fs: dlm: plock use list_first_entry
parents 3d7cb6b0 95858989
......@@ -9,6 +9,15 @@ menuconfig DLM
A general purpose distributed lock manager for kernel or userspace
applications.
config DLM_DEPRECATED_API
bool "DLM deprecated API"
depends on DLM
help
Enables deprecated DLM timeout features that will be removed in
later Linux kernel releases.
If you are unsure, say N.
config DLM_DEBUG
bool "DLM debugging"
depends on DLM
......
......@@ -9,7 +9,6 @@ dlm-y := ast.o \
member.o \
memory.o \
midcomms.o \
netlink.o \
lowcomms.o \
plock.o \
rcom.o \
......@@ -18,5 +17,6 @@ dlm-y := ast.o \
requestqueue.o \
user.o \
util.o
dlm-$(CONFIG_DLM_DEPRECATED_API) += netlink.o
dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o
......@@ -255,13 +255,13 @@ void dlm_callback_work(struct work_struct *work)
if (callbacks[i].flags & DLM_CB_SKIP) {
continue;
} else if (callbacks[i].flags & DLM_CB_BAST) {
bastfn(lkb->lkb_astparam, callbacks[i].mode);
trace_dlm_bast(ls, lkb, callbacks[i].mode);
bastfn(lkb->lkb_astparam, callbacks[i].mode);
} else if (callbacks[i].flags & DLM_CB_CAST) {
lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
trace_dlm_ast(ls, lkb);
castfn(lkb->lkb_astparam);
trace_dlm_ast(ls, lkb, lkb->lkb_lksb);
}
}
......
......@@ -75,8 +75,9 @@ struct dlm_cluster {
unsigned int cl_log_info;
unsigned int cl_protocol;
unsigned int cl_mark;
#ifdef CONFIG_DLM_DEPRECATED_API
unsigned int cl_timewarn_cs;
unsigned int cl_waitwarn_us;
#endif
unsigned int cl_new_rsb_count;
unsigned int cl_recover_callbacks;
char cl_cluster_name[DLM_LOCKSPACE_LEN];
......@@ -102,8 +103,9 @@ enum {
CLUSTER_ATTR_LOG_INFO,
CLUSTER_ATTR_PROTOCOL,
CLUSTER_ATTR_MARK,
#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR_TIMEWARN_CS,
CLUSTER_ATTR_WAITWARN_US,
#endif
CLUSTER_ATTR_NEW_RSB_COUNT,
CLUSTER_ATTR_RECOVER_CALLBACKS,
CLUSTER_ATTR_CLUSTER_NAME,
......@@ -224,8 +226,9 @@ CLUSTER_ATTR(log_debug, NULL);
CLUSTER_ATTR(log_info, NULL);
CLUSTER_ATTR(protocol, dlm_check_protocol_and_dlm_running);
CLUSTER_ATTR(mark, NULL);
#ifdef CONFIG_DLM_DEPRECATED_API
CLUSTER_ATTR(timewarn_cs, dlm_check_zero);
CLUSTER_ATTR(waitwarn_us, NULL);
#endif
CLUSTER_ATTR(new_rsb_count, NULL);
CLUSTER_ATTR(recover_callbacks, NULL);
......@@ -240,8 +243,9 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_LOG_INFO] = &cluster_attr_log_info,
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol,
[CLUSTER_ATTR_MARK] = &cluster_attr_mark,
#ifdef CONFIG_DLM_DEPRECATED_API
[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs,
[CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us,
#endif
[CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count,
[CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks,
[CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name,
......@@ -432,8 +436,9 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_log_debug = dlm_config.ci_log_debug;
cl->cl_log_info = dlm_config.ci_log_info;
cl->cl_protocol = dlm_config.ci_protocol;
#ifdef CONFIG_DLM_DEPRECATED_API
cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
#endif
cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks;
memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name,
......@@ -954,8 +959,9 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_LOG_INFO 1
#define DEFAULT_PROTOCOL DLM_PROTO_TCP
#define DEFAULT_MARK 0
#ifdef CONFIG_DLM_DEPRECATED_API
#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
#define DEFAULT_WAITWARN_US 0
#endif
#define DEFAULT_NEW_RSB_COUNT 128
#define DEFAULT_RECOVER_CALLBACKS 0
#define DEFAULT_CLUSTER_NAME ""
......@@ -971,8 +977,9 @@ struct dlm_config_info dlm_config = {
.ci_log_info = DEFAULT_LOG_INFO,
.ci_protocol = DEFAULT_PROTOCOL,
.ci_mark = DEFAULT_MARK,
#ifdef CONFIG_DLM_DEPRECATED_API
.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
.ci_waitwarn_us = DEFAULT_WAITWARN_US,
#endif
.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT,
.ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS,
.ci_cluster_name = DEFAULT_CLUSTER_NAME
......
......@@ -37,8 +37,9 @@ struct dlm_config_info {
int ci_log_info;
int ci_protocol;
int ci_mark;
#ifdef CONFIG_DLM_DEPRECATED_API
int ci_timewarn_cs;
int ci_waitwarn_us;
#endif
int ci_new_rsb_count;
int ci_recover_callbacks;
char ci_cluster_name[DLM_LOCKSPACE_LEN];
......
......@@ -145,7 +145,9 @@ struct dlm_args {
void (*bastfn) (void *astparam, int mode);
int mode;
struct dlm_lksb *lksb;
#ifdef CONFIG_DLM_DEPRECATED_API
unsigned long timeout;
#endif
};
......@@ -203,10 +205,20 @@ struct dlm_args {
#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
#define DLM_IFL_OVERLAP_CANCEL 0x00100000
#define DLM_IFL_ENDOFLIFE 0x00200000
#ifdef CONFIG_DLM_DEPRECATED_API
#define DLM_IFL_WATCH_TIMEWARN 0x00400000
#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
#endif
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
/* least significant 2 bytes are message changed, they are full transmitted
* but at receive side only the 2 bytes LSB will be set.
*
* Even wireshark dlm dissector does only evaluate the lower bytes and note
* that they may not be used on transceiver side, we assume the higher bytes
* are for internal use or reserved so long they are not parsed on receiver
* side.
*/
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
......@@ -249,10 +261,12 @@ struct dlm_lkb {
struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
struct list_head lkb_wait_reply; /* waiting for remote reply */
struct list_head lkb_ownqueue; /* list of locks for a process */
struct list_head lkb_time_list;
ktime_t lkb_timestamp;
ktime_t lkb_wait_time;
#ifdef CONFIG_DLM_DEPRECATED_API
struct list_head lkb_time_list;
unsigned long lkb_timeout_cs;
#endif
struct mutex lkb_cb_mutex;
struct work_struct lkb_cb_work;
......@@ -568,8 +582,10 @@ struct dlm_ls {
struct mutex ls_orphans_mutex;
struct list_head ls_orphans;
#ifdef CONFIG_DLM_DEPRECATED_API
struct mutex ls_timeout_mutex;
struct list_head ls_timeout;
#endif
spinlock_t ls_new_rsb_spin;
int ls_new_rsb_count;
......@@ -606,8 +622,8 @@ struct dlm_ls {
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
struct completion ls_members_done;
int ls_members_result;
struct completion ls_recovery_done;
int ls_recovery_result;
struct miscdevice ls_device;
......@@ -688,7 +704,9 @@ struct dlm_ls {
#define LSFL_RCOM_READY 5
#define LSFL_RCOM_WAIT 6
#define LSFL_UEVENT_WAIT 7
#ifdef CONFIG_DLM_DEPRECATED_API
#define LSFL_TIMEWARN 8
#endif
#define LSFL_CB_DELAY 9
#define LSFL_NODIR 10
......@@ -741,9 +759,15 @@ static inline int dlm_no_directory(struct dlm_ls *ls)
return test_bit(LSFL_NODIR, &ls->ls_flags);
}
#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_netlink_init(void);
void dlm_netlink_exit(void);
void dlm_timeout_warn(struct dlm_lkb *lkb);
#else
static inline int dlm_netlink_init(void) { return 0; }
static inline void dlm_netlink_exit(void) { };
static inline void dlm_timeout_warn(struct dlm_lkb *lkb) { };
#endif
int dlm_plock_init(void);
void dlm_plock_exit(void);
......
......@@ -296,12 +296,14 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
#ifdef CONFIG_DLM_DEPRECATED_API
/* if the operation was a cancel, then return -DLM_ECANCEL, if a
timeout caused the cancel then return -ETIMEDOUT */
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
rv = -ETIMEDOUT;
}
#endif
if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
......@@ -1210,7 +1212,9 @@ static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
kref_init(&lkb->lkb_ref);
INIT_LIST_HEAD(&lkb->lkb_ownqueue);
INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&lkb->lkb_time_list);
#endif
INIT_LIST_HEAD(&lkb->lkb_cb_list);
mutex_init(&lkb->lkb_cb_mutex);
INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
......@@ -1306,6 +1310,13 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
kref_get(&lkb->lkb_ref);
}
static void unhold_lkb_assert(struct kref *kref)
{
struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
DLM_ASSERT(false, dlm_print_lkb(lkb););
}
/* This is called when we need to remove a reference and are certain
it's not the last ref. e.g. del_lkb is always called between a
find_lkb/put_lkb and is always the inverse of a previous add_lkb.
......@@ -1313,9 +1324,7 @@ static inline void hold_lkb(struct dlm_lkb *lkb)
static inline void unhold_lkb(struct dlm_lkb *lkb)
{
int rv;
rv = kref_put(&lkb->lkb_ref, kill_lkb);
DLM_ASSERT(!rv, dlm_print_lkb(lkb););
kref_put(&lkb->lkb_ref, unhold_lkb_assert);
}
static void lkb_add_ordered(struct list_head *new, struct list_head *head,
......@@ -1402,75 +1411,6 @@ static int msg_reply_type(int mstype)
return -1;
}
static int nodeid_warned(int nodeid, int num_nodes, int *warned)
{
int i;
for (i = 0; i < num_nodes; i++) {
if (!warned[i]) {
warned[i] = nodeid;
return 0;
}
if (warned[i] == nodeid)
return 1;
}
return 0;
}
void dlm_scan_waiters(struct dlm_ls *ls)
{
struct dlm_lkb *lkb;
s64 us;
s64 debug_maxus = 0;
u32 debug_scanned = 0;
u32 debug_expired = 0;
int num_nodes = 0;
int *warned = NULL;
if (!dlm_config.ci_waitwarn_us)
return;
mutex_lock(&ls->ls_waiters_mutex);
list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
if (!lkb->lkb_wait_time)
continue;
debug_scanned++;
us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
if (us < dlm_config.ci_waitwarn_us)
continue;
lkb->lkb_wait_time = 0;
debug_expired++;
if (us > debug_maxus)
debug_maxus = us;
if (!num_nodes) {
num_nodes = ls->ls_num_nodes;
warned = kcalloc(num_nodes, sizeof(int), GFP_KERNEL);
}
if (!warned)
continue;
if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
continue;
log_error(ls, "waitwarn %x %lld %d us check connection to "
"node %d", lkb->lkb_id, (long long)us,
dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
}
mutex_unlock(&ls->ls_waiters_mutex);
kfree(warned);
if (debug_expired)
log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
debug_scanned, debug_expired,
dlm_config.ci_waitwarn_us, (long long)debug_maxus);
}
/* add/remove lkb from global waiters list of lkb's waiting for
a reply from a remote node */
......@@ -1514,7 +1454,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
lkb->lkb_wait_count++;
lkb->lkb_wait_type = mstype;
lkb->lkb_wait_time = ktime_get();
lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
hold_lkb(lkb);
list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
......@@ -1842,6 +1781,7 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
}
}
#ifdef CONFIG_DLM_DEPRECATED_API
static void add_timeout(struct dlm_lkb *lkb)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
......@@ -1962,17 +1902,11 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
mutex_unlock(&ls->ls_timeout_mutex);
if (!dlm_config.ci_waitwarn_us)
return;
mutex_lock(&ls->ls_waiters_mutex);
list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
if (ktime_to_us(lkb->lkb_wait_time))
lkb->lkb_wait_time = ktime_get();
}
mutex_unlock(&ls->ls_waiters_mutex);
}
#else
static void add_timeout(struct dlm_lkb *lkb) { }
static void del_timeout(struct dlm_lkb *lkb) { }
#endif
/* lkb is master or local copy */
......@@ -2837,12 +2771,20 @@ static void confirm_master(struct dlm_rsb *r, int error)
}
}
#ifdef CONFIG_DLM_DEPRECATED_API
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
int namelen, unsigned long timeout_cs,
void (*ast) (void *astparam),
void *astparam,
void (*bast) (void *astparam, int mode),
struct dlm_args *args)
#else
static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
int namelen, void (*ast)(void *astparam),
void *astparam,
void (*bast)(void *astparam, int mode),
struct dlm_args *args)
#endif
{
int rv = -EINVAL;
......@@ -2895,7 +2837,9 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
args->astfn = ast;
args->astparam = astparam;
args->bastfn = bast;
#ifdef CONFIG_DLM_DEPRECATED_API
args->timeout = timeout_cs;
#endif
args->mode = mode;
args->lksb = lksb;
rv = 0;
......@@ -2951,7 +2895,9 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
lkb->lkb_lksb = args->lksb;
lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
lkb->lkb_ownpid = (int) current->pid;
#ifdef CONFIG_DLM_DEPRECATED_API
lkb->lkb_timeout_cs = args->timeout;
#endif
rv = 0;
out:
if (rv)
......@@ -3472,10 +3418,15 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error)
goto out;
trace_dlm_lock_start(ls, lkb, mode, flags);
trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags);
#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
astarg, bast, &args);
#else
error = set_lock_args(mode, lksb, flags, namelen, ast, astarg, bast,
&args);
#endif
if (error)
goto out_put;
......@@ -3487,7 +3438,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
if (error == -EINPROGRESS)
error = 0;
out_put:
trace_dlm_lock_end(ls, lkb, mode, flags, error);
trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error);
if (convert || error)
__put_lkb(ls, lkb);
......@@ -5839,9 +5790,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
return 0;
}
#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs)
#else
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
int mode, uint32_t flags, void *name, unsigned int namelen)
#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
......@@ -5864,8 +5820,13 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
goto out;
}
}
#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
#else
error = set_lock_args(mode, &ua->lksb, flags, namelen, fake_astfn, ua,
fake_bastfn, &args);
#endif
if (error) {
kfree(ua->lksb.sb_lvbptr);
ua->lksb.sb_lvbptr = NULL;
......@@ -5904,9 +5865,14 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
return error;
}
#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs)
#else
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
#endif
{
struct dlm_lkb *lkb;
struct dlm_args args;
......@@ -5941,8 +5907,13 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
ua->bastaddr = ua_tmp->bastaddr;
ua->user_lksb = ua_tmp->user_lksb;
#ifdef CONFIG_DLM_DEPRECATED_API
error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
fake_astfn, ua, fake_bastfn, &args);
#else
error = set_lock_args(mode, &ua->lksb, flags, 0, fake_astfn, ua,
fake_bastfn, &args);
#endif
if (error)
goto out_put;
......@@ -5966,7 +5937,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs, uint32_t *lkid)
uint32_t *lkid)
{
struct dlm_lkb *lkb = NULL, *iter;
struct dlm_user_args *ua;
......
......@@ -24,9 +24,15 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
void dlm_scan_rsbs(struct dlm_ls *ls);
int dlm_lock_recovery_try(struct dlm_ls *ls);
void dlm_unlock_recovery(struct dlm_ls *ls);
void dlm_scan_waiters(struct dlm_ls *ls);
#ifdef CONFIG_DLM_DEPRECATED_API
void dlm_scan_timeout(struct dlm_ls *ls);
void dlm_adjust_timeouts(struct dlm_ls *ls);
#else
static inline void dlm_scan_timeout(struct dlm_ls *ls) { }
static inline void dlm_adjust_timeouts(struct dlm_ls *ls) { }
#endif
int dlm_master_lookup(struct dlm_ls *ls, int nodeid, char *name, int len,
unsigned int flags, int *r_nodeid, int *result);
......@@ -41,15 +47,22 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls);
int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
#ifdef CONFIG_DLM_DEPRECATED_API
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
unsigned long timeout_cs);
#else
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
uint32_t flags, void *name, unsigned int namelen);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
#endif
int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs, uint32_t *lkid);
uint32_t *lkid);
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
......
......@@ -275,7 +275,6 @@ static int dlm_scand(void *data)
ls->ls_scan_time = jiffies;
dlm_scan_rsbs(ls);
dlm_scan_timeout(ls);
dlm_scan_waiters(ls);
dlm_unlock_recovery(ls);
} else {
ls->ls_scan_time += HZ;
......@@ -490,13 +489,28 @@ static int new_lockspace(const char *name, const char *cluster,
ls->ls_ops_arg = ops_arg;
}
if (flags & DLM_LSFL_TIMEWARN)
#ifdef CONFIG_DLM_DEPRECATED_API
if (flags & DLM_LSFL_TIMEWARN) {
pr_warn_once("===============================================================\n"
"WARNING: the dlm DLM_LSFL_TIMEWARN flag is being deprecated and\n"
" will be removed in v6.2!\n"
" Inclusive DLM_LSFL_TIMEWARN define in UAPI header!\n"
"===============================================================\n");
set_bit(LSFL_TIMEWARN, &ls->ls_flags);
}
/* ls_exflags are forced to match among nodes, and we don't
need to require all nodes to have some flags set */
* need to require all nodes to have some flags set
*/
ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
DLM_LSFL_NEWEXCL));
#else
/* ls_exflags are forced to match among nodes, and we don't
* need to require all nodes to have some flags set
*/
ls->ls_exflags = (flags & ~(DLM_LSFL_FS | DLM_LSFL_NEWEXCL));
#endif
size = READ_ONCE(dlm_config.ci_rsbtbl_size);
ls->ls_rsbtbl_size = size;
......@@ -527,8 +541,10 @@ static int new_lockspace(const char *name, const char *cluster,
mutex_init(&ls->ls_waiters_mutex);
INIT_LIST_HEAD(&ls->ls_orphans);
mutex_init(&ls->ls_orphans_mutex);
#ifdef CONFIG_DLM_DEPRECATED_API
INIT_LIST_HEAD(&ls->ls_timeout);
mutex_init(&ls->ls_timeout_mutex);
#endif
INIT_LIST_HEAD(&ls->ls_new_rsb);
spin_lock_init(&ls->ls_new_rsb_spin);
......@@ -548,8 +564,8 @@ static int new_lockspace(const char *name, const char *cluster,
init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0;
init_completion(&ls->ls_members_done);
ls->ls_members_result = -1;
init_completion(&ls->ls_recovery_done);
ls->ls_recovery_result = -1;
mutex_init(&ls->ls_cb_mutex);
INIT_LIST_HEAD(&ls->ls_cb_delay);
......@@ -645,8 +661,9 @@ static int new_lockspace(const char *name, const char *cluster,
if (error)
goto out_recoverd;
wait_for_completion(&ls->ls_members_done);
error = ls->ls_members_result;
/* wait until recovery is successful or failed */
wait_for_completion(&ls->ls_recovery_done);
error = ls->ls_recovery_result;
if (error)
goto out_members;
......
......@@ -529,7 +529,7 @@ static void lowcomms_write_space(struct sock *sk)
return;
if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
log_print("successful connected to node %d", con->nodeid);
log_print("connected to node %d", con->nodeid);
queue_work(send_workqueue, &con->swork);
return;
}
......@@ -1931,7 +1931,7 @@ static int dlm_sctp_connect(struct connection *con, struct socket *sock,
return ret;
if (!test_and_set_bit(CF_CONNECTED, &con->flags))
log_print("successful connected to node %d", con->nodeid);
log_print("connected to node %d", con->nodeid);
return 0;
}
......
......@@ -534,7 +534,11 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
int i, error, neg = 0, low = -1;
/* previously removed members that we've not finished removing need to
count as a negative change so the "neg" recovery steps will happen */
* count as a negative change so the "neg" recovery steps will happen
*
* This functionality must report all member changes to lsops or
* midcomms layer and must never return before.
*/
list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
log_rinfo(ls, "prev removed member %d", memb->nodeid);
......@@ -583,19 +587,6 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
*neg_out = neg;
error = ping_members(ls);
/* error -EINTR means that a new recovery action is triggered.
* We ignore this recovery action and let run the new one which might
* have new member configuration.
*/
if (error == -EINTR)
error = 0;
/* new_lockspace() may be waiting to know if the config
* is good or bad
*/
ls->ls_members_result = error;
complete(&ls->ls_members_done);
log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
return error;
}
......@@ -675,7 +666,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
if (!ls->ls_recover_begin)
ls->ls_recover_begin = jiffies;
dlm_lsop_recover_prep(ls);
/* call recover_prep ops only once and not multiple times
* for each possible dlm_ls_stop() when recovery is already
* stopped.
*
* If we successful was able to clear LSFL_RUNNING bit and
* it was set we know it is the first dlm_ls_stop() call.
*/
if (new)
dlm_lsop_recover_prep(ls);
return 0;
}
......
......@@ -29,6 +29,8 @@ struct plock_async_data {
struct plock_op {
struct list_head list;
int done;
/* if lock op got interrupted while waiting dlm_controld reply */
bool sigint;
struct dlm_plock_info info;
/* if set indicates async handling */
struct plock_async_data *data;
......@@ -79,8 +81,7 @@ static void send_op(struct plock_op *op)
abandoned waiter. So, we have to insert the unlock-close when the
lock call is interrupted. */
static void do_unlock_close(struct dlm_ls *ls, u64 number,
struct file *file, struct file_lock *fl)
static void do_unlock_close(const struct dlm_plock_info *info)
{
struct plock_op *op;
......@@ -89,15 +90,12 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
return;
op->info.optype = DLM_PLOCK_OP_UNLOCK;
op->info.pid = fl->fl_pid;
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.pid = info->pid;
op->info.fsid = info->fsid;
op->info.number = info->number;
op->info.start = 0;
op->info.end = OFFSET_MAX;
if (fl->fl_lmops && fl->fl_lmops->lm_grant)
op->info.owner = (__u64) fl->fl_pid;
else
op->info.owner = (__u64)(long) fl->fl_owner;
op->info.owner = info->owner;
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
......@@ -161,16 +159,24 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
rv = wait_event_interruptible(recv_wq, (op->done != 0));
if (rv == -ERESTARTSYS) {
spin_lock(&ops_lock);
list_del(&op->list);
/* recheck under ops_lock if we got a done != 0,
* if so this interrupt case should be ignored
*/
if (op->done != 0) {
spin_unlock(&ops_lock);
goto do_lock_wait;
}
op->sigint = true;
spin_unlock(&ops_lock);
log_print("%s: wait interrupted %x %llx, op removed",
log_debug(ls, "%s: wait interrupted %x %llx pid %d",
__func__, ls->ls_global_id,
(unsigned long long)number);
dlm_release_plock_op(op);
do_unlock_close(ls, number, file, fl);
(unsigned long long)number, op->info.pid);
goto out;
}
do_lock_wait:
WARN_ON(!list_empty(&op->list));
rv = op->info.rv;
......@@ -378,7 +384,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
op = list_entry(send_list.next, struct plock_op, list);
op = list_first_entry(&send_list, struct plock_op, list);
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
list_del(&op->list);
else
......@@ -425,6 +431,19 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
if (iter->info.fsid == info.fsid &&
iter->info.number == info.number &&
iter->info.owner == info.owner) {
if (iter->sigint) {
list_del(&iter->list);
spin_unlock(&ops_lock);
pr_debug("%s: sigint cleanup %x %llx pid %d",
__func__, iter->info.fsid,
(unsigned long long)iter->info.number,
iter->info.pid);
do_unlock_close(&iter->info);
memcpy(&iter->info, &info, sizeof(info));
dlm_release_plock_op(iter);
return count;
}
list_del_init(&iter->list);
memcpy(&iter->info, &info, sizeof(info));
if (iter->data)
......@@ -443,7 +462,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
else
wake_up(&recv_wq);
} else
log_print("%s: no op %x %llx - may got interrupted?", __func__,
log_print("%s: no op %x %llx", __func__,
info.fsid, (unsigned long long)info.number);
return count;
}
......
......@@ -70,6 +70,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
/*
* Add or remove nodes from the lockspace's ls_nodes list.
*
* Due to the fact that we must report all membership changes to lsops
* or midcomms layer, it is not permitted to abort ls_recover() until
* this is done.
*/
error = dlm_recover_members(ls, rv, &neg);
......@@ -239,14 +243,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
jiffies_to_msecs(jiffies - start));
mutex_unlock(&ls->ls_recoverd_active);
dlm_lsop_recover_done(ls);
return 0;
fail:
dlm_release_root_list(ls);
log_rinfo(ls, "dlm_recover %llu error %d",
(unsigned long long)rv->seq, error);
mutex_unlock(&ls->ls_recoverd_active);
return error;
}
......@@ -257,6 +259,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
static void do_ls_recovery(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL;
int error;
spin_lock(&ls->ls_recover_lock);
rv = ls->ls_recover_args;
......@@ -266,7 +269,31 @@ static void do_ls_recovery(struct dlm_ls *ls)
spin_unlock(&ls->ls_recover_lock);
if (rv) {
ls_recover(ls, rv);
error = ls_recover(ls, rv);
switch (error) {
case 0:
ls->ls_recovery_result = 0;
complete(&ls->ls_recovery_done);
dlm_lsop_recover_done(ls);
break;
case -EINTR:
/* if recovery was interrupted -EINTR we wait for the next
* ls_recover() iteration until it hopefully succeeds.
*/
log_rinfo(ls, "%s %llu interrupted and should be queued to run again",
__func__, (unsigned long long)rv->seq);
break;
default:
log_rinfo(ls, "%s %llu error %d", __func__,
(unsigned long long)rv->seq, error);
/* let new_lockspace() get aware of critical error */
ls->ls_recovery_result = error;
complete(&ls->ls_recovery_done);
break;
}
kfree(rv->nodes);
kfree(rv);
}
......
......@@ -250,6 +250,14 @@ static int device_user_lock(struct dlm_user_proc *proc,
goto out;
}
#ifdef CONFIG_DLM_DEPRECATED_API
if (params->timeout)
pr_warn_once("========================================================\n"
"WARNING: the lkb timeout feature is being deprecated and\n"
" will be removed in v6.2!\n"
"========================================================\n");
#endif
ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
if (!ua)
goto out;
......@@ -262,23 +270,34 @@ static int device_user_lock(struct dlm_user_proc *proc,
ua->xid = params->xid;
if (params->flags & DLM_LKF_CONVERT) {
#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
params->lkid, params->lvb,
(unsigned long) params->timeout);
#else
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
params->lkid, params->lvb);
#endif
} else if (params->flags & DLM_LKF_ORPHAN) {
error = dlm_user_adopt_orphan(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
(unsigned long) params->timeout,
&lkid);
if (!error)
error = lkid;
} else {
#ifdef CONFIG_DLM_DEPRECATED_API
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
(unsigned long) params->timeout);
#else
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen);
#endif
if (!error)
error = ua->lksb.sb_lkid;
}
......
......@@ -49,38 +49,52 @@
/* note: we begin tracing dlm_lock_start() only if ls and lkb are found */
TRACE_EVENT(dlm_lock_start,
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode,
__u32 flags),
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name,
unsigned int namelen, int mode, __u32 flags),
TP_ARGS(ls, lkb, mode, flags),
TP_ARGS(ls, lkb, name, namelen, mode, flags),
TP_STRUCT__entry(
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(int, mode)
__field(__u32, flags)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
__entry->flags = flags;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
else if (name)
memcpy(__get_dynamic_array(res_name), name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s",
TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
show_lock_flags(__entry->flags))
show_lock_flags(__entry->flags),
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
TRACE_EVENT(dlm_lock_end,
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, __u32 flags,
int error),
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name,
unsigned int namelen, int mode, __u32 flags, int error),
TP_ARGS(ls, lkb, mode, flags, error),
TP_ARGS(ls, lkb, name, namelen, mode, flags, error),
TP_STRUCT__entry(
__field(__u32, ls_id)
......@@ -88,14 +102,26 @@ TRACE_EVENT(dlm_lock_end,
__field(int, mode)
__field(__u32, flags)
__field(int, error)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
__entry->flags = flags;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
else if (name)
memcpy(__get_dynamic_array(res_name), name,
__get_dynamic_array_len(res_name));
/* return value will be zeroed in those cases by dlm_lock()
* we do it here again to not introduce more overhead if
* trace isn't running and error reflects the return value.
......@@ -104,12 +130,15 @@ TRACE_EVENT(dlm_lock_end,
__entry->error = 0;
else
__entry->error = error;
),
TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d",
TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
show_lock_flags(__entry->flags), __entry->error)
show_lock_flags(__entry->flags), __entry->error,
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
......@@ -123,42 +152,65 @@ TRACE_EVENT(dlm_bast,
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(int, mode)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->mode = mode;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x mode=%s", __entry->ls_id,
__entry->lkb_id, show_lock_mode(__entry->mode))
TP_printk("ls_id=%u lkb_id=%x mode=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_mode(__entry->mode),
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
TRACE_EVENT(dlm_ast,
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_lksb *lksb),
TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb),
TP_ARGS(ls, lkb, lksb),
TP_ARGS(ls, lkb),
TP_STRUCT__entry(
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(u8, sb_flags)
__field(int, sb_status)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->sb_flags = lksb->sb_flags;
__entry->sb_status = lksb->sb_status;
__entry->sb_flags = lkb->lkb_lksb->sb_flags;
__entry->sb_status = lkb->lkb_lksb->sb_status;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d",
TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status)
show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status,
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
......@@ -173,17 +225,28 @@ TRACE_EVENT(dlm_unlock_start,
__field(__u32, ls_id)
__field(__u32, lkb_id)
__field(__u32, flags)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->flags = flags;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x flags=%s",
TP_printk("ls_id=%u lkb_id=%x flags=%s res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_flags(__entry->flags))
show_lock_flags(__entry->flags),
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
......@@ -199,18 +262,29 @@ TRACE_EVENT(dlm_unlock_end,
__field(__u32, lkb_id)
__field(__u32, flags)
__field(int, error)
__dynamic_array(unsigned char, res_name,
lkb->lkb_resource ? lkb->lkb_resource->res_length : 0)
),
TP_fast_assign(
struct dlm_rsb *r;
__entry->ls_id = ls->ls_global_id;
__entry->lkb_id = lkb->lkb_id;
__entry->flags = flags;
__entry->error = error;
r = lkb->lkb_resource;
if (r)
memcpy(__get_dynamic_array(res_name), r->res_name,
__get_dynamic_array_len(res_name));
),
TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d",
TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d res_name=%s",
__entry->ls_id, __entry->lkb_id,
show_lock_flags(__entry->flags), __entry->error)
show_lock_flags(__entry->flags), __entry->error,
__print_hex_str(__get_dynamic_array(res_name),
__get_dynamic_array_len(res_name)))
);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment