Commit 4f317748 authored by Amir Shehata's avatar Amir Shehata Committed by Greg Kroah-Hartman

staging/lustre/lnet: Fix assert on empty group in selftest module

The core of the issue is that the selftest module doesn't sanitize its
own API, but it depends on lst utility to do such checks.  As a result
this issue manifests itself in this particular LU through an assert
on an empty group.  If the NID is misspelled then an empty group is
added.  An error output is provided, but if that's never checked in a
batch script, as is the case with this issue, then the script will try
to add an empty group to a test to run in a batch, and that will cause
an assert

The fix is two fold.  Ensure that lst utility checks that a group is
added with at least one node.  If not the group is subsequently
deleted.  And the add_test command would fail, since the group no
longer exists.

The second fix is to ensure that the kernel module itself sanitizes
its own API in this particular case, so that if a different utility is
used other than lst to communicate with the selftest kernel module
then this error would be caught.  This fix looks up the batch and the
groups, src and dst, in the ioctl handle and sanitizes that input at
this point.  If the group looked up either doesn't exist or doesn't
have at least one ACTIVE node, then the command fails.

NOTE:there are many other cases in the code where the selftest kernel
module doesn't check for sanity of the input, but depends totally on
the lst module to do such checks.  Particularly around length of
strings passed in.  Thus it is possible to crash the selftest module
if someone tries to create another userspace app to communicate with
the selftest kernel module without ensuring sanity of the params sent
to the kernel module.  In effect, it's always assumed that lst is the
front end for selftest and no other front end is to be used.

Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3093
Lustre-change: http://review.whamcloud.com/6092Signed-off-by: default avatarAmir Shehata <amir.shehata@intel.com>
Reviewed-by: default avatarIsaac Huang <he.huang@intel.com>
Reviewed-by: default avatarLiang Zhen <liang.zhen@intel.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
[coding style fix of the original patch is splitted into a new one -- PengTao]
Signed-off-by: default avatarPeng Tao <bergwolf@gmail.com>
Signed-off-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 6d95e048
......@@ -1237,41 +1237,77 @@ lstcon_test_nodes_add(lstcon_test_t *test, struct list_head *result_up)
goto again;
}
int
lstcon_test_add(char *name, int type, int loop, int concur,
int dist, int span, char *src_name, char * dst_name,
void *param, int paramlen, int *retp,
struct list_head *result_up)
static int
lstcon_verify_batch(const char *name, lstcon_batch_t **batch)
{
lstcon_group_t *src_grp = NULL;
lstcon_group_t *dst_grp = NULL;
lstcon_test_t *test = NULL;
lstcon_batch_t *batch;
int rc;
int rc;
rc = lstcon_batch_find(name, &batch);
rc = lstcon_batch_find(name, batch);
if (rc != 0) {
CDEBUG(D_NET, "Can't find batch %s\n", name);
return rc;
}
if (batch->bat_state != LST_BATCH_IDLE) {
if ((*batch)->bat_state != LST_BATCH_IDLE) {
CDEBUG(D_NET, "Can't change running batch %s\n", name);
return rc;
return -EINVAL;
}
rc = lstcon_group_find(src_name, &src_grp);
return 0;
}
static int
lstcon_verify_group(const char *name, lstcon_group_t **grp)
{
int rc;
lstcon_ndlink_t *ndl;
rc = lstcon_group_find(name, grp);
if (rc != 0) {
CDEBUG(D_NET, "Can't find group %s\n", src_name);
goto out;
CDEBUG(D_NET, "can't find group %s\n", name);
return rc;
}
rc = lstcon_group_find(dst_name, &dst_grp);
if (rc != 0) {
CDEBUG(D_NET, "Can't find group %s\n", dst_name);
goto out;
list_for_each_entry(ndl, &(*grp)->grp_ndl_list, ndl_link) {
if (ndl->ndl_node->nd_state == LST_NODE_ACTIVE)
return 0;
}
CDEBUG(D_NET, "Group %s has no ACTIVE nodes\n", name);
return -EINVAL;
}
int
lstcon_test_add(char *batch_name, int type, int loop,
int concur, int dist, int span,
char *src_name, char *dst_name,
void *param, int paramlen, int *retp,
struct list_head *result_up)
{
lstcon_test_t *test = NULL;
int rc;
lstcon_group_t *src_grp = NULL;
lstcon_group_t *dst_grp = NULL;
lstcon_batch_t *batch = NULL;
/*
* verify that a batch of the given name exists, and the groups
* that will be part of the batch exist and have at least one
* active node
*/
rc = lstcon_verify_batch(batch_name, &batch);
if (rc != 0)
goto out;
rc = lstcon_verify_group(src_name, &src_grp);
if (rc != 0)
goto out;
rc = lstcon_verify_group(dst_name, &dst_grp);
if (rc != 0)
goto out;
if (dst_grp->grp_userland)
*retp = 1;
......@@ -1310,7 +1346,8 @@ lstcon_test_add(char *name, int type, int loop, int concur,
if (lstcon_trans_stat()->trs_rpc_errno != 0 ||
lstcon_trans_stat()->trs_fwk_errno != 0)
CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, name);
CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type,
batch_name);
/* add to test list anyway, so user can check what's going on */
list_add_tail(&test->tes_link, &batch->bat_test_list);
......
......@@ -224,9 +224,9 @@ extern int lstcon_group_stat(char *grp_name, int timeout,
struct list_head *result_up);
extern int lstcon_nodes_stat(int count, lnet_process_id_t *ids_up,
int timeout, struct list_head *result_up);
extern int lstcon_test_add(char *name, int type, int loop, int concur,
int dist, int span, char *src_name, char * dst_name,
extern int lstcon_test_add(char *batch_name, int type, int loop,
int concur, int dist, int span,
char *src_name, char *dst_name,
void *param, int paramlen, int *retp,
struct list_head *result_up);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment