Commit 2d58de78 authored by Li Wei's avatar Li Wei Committed by Greg Kroah-Hartman

staging/lustre/ptlrpc: Translate between host and network errnos

Lustre puts system errors (e.g., ENOTCONN) on wire as numbers
essentially specific to senders' architectures.  While this is fine
for x86-only sites, where receivers share the same error number
definition with senders, problems will arise, however, for sites
involving multiple architectures with different error number
definitions.  For instance, an ENOTCONN reply from a sparc server will
be put on wire as -57, which, for an x86 client, means EBADSLT
instead.

To solve the problem, this patch defines a set of network errors for
on-wire or on-disk uses.  These errors correspond to a subset of the
x86 system errors and share the same number definition, maintaining
compatibility with existing x86 clients and servers.

Then, either error numbers could be translated at run time, or all
host errors going on wire could be replaced with network errors in the
code.  This patch does the former by introducing both generic and
field-specific translation routines and calling them at proper places,
so that translations for existing fields are transparent.
(Personally, I tend to think the latter way might be worthwhile, as it
is more straightforward conceptually.  Do we really need so many
different errors?  Should errors returned by kernel routines really be
passed up and eventually put on wire?  There could even be security
implications in that.)

Thank Fujitsu for the original idea and their contributions that make
this available upstream.

Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-2743
Lustre-change: http://review.whamcloud.com/5577Signed-off-by: default avatarLi Wei <wei.g.li@intel.com>
Reviewed-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Reviewed-by: default avatarHiroya Nozaki <nozaki.hiroya@jp.fujitsu.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarPeng Tao <tao.peng@emc.com>
Signed-off-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 20802057
......@@ -48,3 +48,8 @@ config LUSTRE_DEBUG_EXPENSIVE_CHECK
expensive checks that may have a performance impact.
Use with caution. If unsure, say N.
config LUSTRE_TRANSLATE_ERRNOS
bool
depends on LUSTRE_FS && !X86
default true
This diff is collapsed.
......@@ -98,6 +98,8 @@
/* Defn's shared with user-space. */
#include <lustre/lustre_user.h>
#include <lustre/lustre_errno.h>
/*
* GENERAL STUFF
*/
......
......@@ -3166,6 +3166,38 @@ lustre_shrink_reply(struct ptlrpc_request *req, int segment,
req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment,
newlen, move_data);
}
#ifdef CONFIG_LUSTRE_TRANSLATE_ERRNOS
static inline int ptlrpc_status_hton(int h)
{
/*
* Positive errnos must be network errnos, such as LUSTRE_EDEADLK,
* ELDLM_LOCK_ABORTED, etc.
*/
if (h < 0)
return -lustre_errno_hton(-h);
else
return h;
}
static inline int ptlrpc_status_ntoh(int n)
{
/*
* See the comment in ptlrpc_status_hton().
*/
if (n < 0)
return -lustre_errno_ntoh(-n);
else
return n;
}
#else
#define ptlrpc_status_hton(h) (h)
#define ptlrpc_status_ntoh(n) (n)
#endif
/** @} */
/** Change request phase of \a req to \a new_phase */
......
......@@ -2208,7 +2208,8 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
/* I can't check the type of lock here because the bitlock of lock
* is not held here, so do the allocation blindly. -jay */
OBD_SLAB_ALLOC_PTR_GFP(node, ldlm_interval_slab, __GFP_IO);
if (node == NULL) /* Actually, this causes EDEADLOCK to be returned */
if (node == NULL)
/* Actually, this causes EDEADLOCK to be returned */
RETURN(NULL);
LASSERTF((new_mode == LCK_PW && lock->l_granted_mode == LCK_PR),
......
......@@ -555,6 +555,8 @@ static int ldlm_handle_qc_callback(struct ptlrpc_request *req)
RETURN(-EPROTO);
}
oqctl->qc_stat = ptlrpc_status_ntoh(oqctl->qc_stat);
cli->cl_qchk_stat = oqctl->qc_stat;
return 0;
}
......
......@@ -1023,7 +1023,7 @@ static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode,
ldlm_reprocess_all(res);
rc = 0;
} else {
rc = EDEADLOCK;
rc = LUSTRE_EDEADLK;
}
LDLM_DEBUG(lock, "client-side local convert handler END");
LDLM_LOCK_PUT(lock);
......@@ -1095,7 +1095,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, __u32 *flags)
GOTO(out, rc);
}
} else {
rc = EDEADLOCK;
rc = LUSTRE_EDEADLK;
}
EXIT;
out:
......@@ -1248,7 +1248,7 @@ int ldlm_cli_cancel_req(struct obd_export *exp, struct list_head *cancels,
} else {
rc = ptlrpc_queue_wait(req);
}
if (rc == ESTALE) {
if (rc == LUSTRE_ESTALE) {
CDEBUG(D_DLMTRACE, "client/server (nid %s) "
"out of sync -- not fatal\n",
libcfs_nid2str(req->rq_import->
......
......@@ -840,6 +840,9 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
LASSERT(lockrep != NULL);
lockrep->lock_policy_res2 =
ptlrpc_status_ntoh(lockrep->lock_policy_res2);
/* Retry the create infinitely when we get -EINPROGRESS from
* server. This is required by the new quota design. */
if (it && it->it_op & IT_CREAT &&
......@@ -1139,6 +1142,7 @@ static int mdc_intent_getattr_async_interpret(const struct lu_env *env,
struct lookup_intent *it;
struct lustre_handle *lockh;
struct obd_device *obddev;
struct ldlm_reply *lockrep;
__u64 flags = LDLM_FL_HAS_INTENT;
ENTRY;
......@@ -1159,6 +1163,12 @@ static int mdc_intent_getattr_async_interpret(const struct lu_env *env,
GOTO(out, rc);
}
lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
LASSERT(lockrep != NULL);
lockrep->lock_policy_res2 =
ptlrpc_status_ntoh(lockrep->lock_policy_res2);
rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc);
if (rc)
GOTO(out, rc);
......
......@@ -1189,6 +1189,7 @@ static int mdc_ioc_hsm_progress(struct obd_export *exp,
GOTO(out, rc = -EPROTO);
*req_hpk = *hpk;
req_hpk->hpk_errval = lustre_errno_hton(hpk->hpk_errval);
ptlrpc_request_set_replen(req);
......
......@@ -2347,6 +2347,8 @@ static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
&RMF_DLM_REP);
LASSERT(rep != NULL);
rep->lock_policy_res1 =
ptlrpc_status_ntoh(rep->lock_policy_res1);
if (rep->lock_policy_res1)
rc = rep->lock_policy_res1;
}
......
......@@ -16,6 +16,7 @@ ptlrpc_objs += sec.o sec_bulk.o sec_gc.o sec_config.o sec_lproc.o
ptlrpc_objs += sec_null.o sec_plain.o nrs.o nrs_fifo.o
ptlrpc-y := $(ldlm_objs) $(ptlrpc_objs)
ptlrpc-$(CONFIG_LUSTRE_TRANSLATE_ERRNOS) += errno.c
obj-$(CONFIG_PTLRPC_GSS) += gss/
......
This diff is collapsed.
......@@ -400,7 +400,8 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
req->rq_type = PTL_RPC_MSG_REPLY;
lustre_msg_set_type(req->rq_repmsg, req->rq_type);
lustre_msg_set_status(req->rq_repmsg, req->rq_status);
lustre_msg_set_status(req->rq_repmsg,
ptlrpc_status_hton(req->rq_status));
lustre_msg_set_opc(req->rq_repmsg,
req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
......
......@@ -642,6 +642,9 @@ static inline int lustre_unpack_ptlrpc_body_v2(struct ptlrpc_request *req,
return -EINVAL;
}
if (!inout)
pb->pb_status = ptlrpc_status_ntoh(pb->pb_status);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment