Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
107e0008
Commit
107e0008
authored
Oct 08, 2008
by
J. Bruce Fields
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'from-tomtucker' into for-2.6.28
parents
29373913
67080c82
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
710 additions
and
123 deletions
+710
-123
include/linux/sunrpc/svc_rdma.h
include/linux/sunrpc/svc_rdma.h
+26
-1
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+166
-21
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
+215
-40
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
+303
-61
No files found.
include/linux/sunrpc/svc_rdma.h
View file @
107e0008
...
...
@@ -72,6 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
*/
struct
svc_rdma_op_ctxt
{
struct
svc_rdma_op_ctxt
*
read_hdr
;
struct
svc_rdma_fastreg_mr
*
frmr
;
int
hdr_count
;
struct
xdr_buf
arg
;
struct
list_head
dto_q
;
...
...
@@ -103,16 +104,30 @@ struct svc_rdma_chunk_sge {
int
start
;
/* sge no for this chunk */
int
count
;
/* sge count for this chunk */
};
struct
svc_rdma_fastreg_mr
{
struct
ib_mr
*
mr
;
void
*
kva
;
struct
ib_fast_reg_page_list
*
page_list
;
int
page_list_len
;
unsigned
long
access_flags
;
unsigned
long
map_len
;
enum
dma_data_direction
direction
;
struct
list_head
frmr_list
;
};
struct
svc_rdma_req_map
{
struct
svc_rdma_fastreg_mr
*
frmr
;
unsigned
long
count
;
union
{
struct
kvec
sge
[
RPCSVC_MAXPAGES
];
struct
svc_rdma_chunk_sge
ch
[
RPCSVC_MAXPAGES
];
};
};
#define RDMACTXT_F_FAST_UNREG 1
#define RDMACTXT_F_LAST_CTXT 2
#define SVCRDMA_DEVCAP_FAST_REG 1
/* fast mr registration */
#define SVCRDMA_DEVCAP_READ_W_INV 2
/* read w/ invalidate */
struct
svcxprt_rdma
{
struct
svc_xprt
sc_xprt
;
/* SVC transport structure */
struct
rdma_cm_id
*
sc_cm_id
;
/* RDMA connection id */
...
...
@@ -136,6 +151,11 @@ struct svcxprt_rdma {
struct
ib_cq
*
sc_rq_cq
;
struct
ib_cq
*
sc_sq_cq
;
struct
ib_mr
*
sc_phys_mr
;
/* MR for server memory */
u32
sc_dev_caps
;
/* distilled device caps */
u32
sc_dma_lkey
;
/* local dma key */
unsigned
int
sc_frmr_pg_list_len
;
struct
list_head
sc_frmr_q
;
spinlock_t
sc_frmr_q_lock
;
spinlock_t
sc_lock
;
/* transport lock */
...
...
@@ -192,8 +212,13 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern
int
svc_rdma_create_listen
(
struct
svc_serv
*
,
int
,
struct
sockaddr
*
);
extern
struct
svc_rdma_op_ctxt
*
svc_rdma_get_context
(
struct
svcxprt_rdma
*
);
extern
void
svc_rdma_put_context
(
struct
svc_rdma_op_ctxt
*
,
int
);
extern
void
svc_rdma_unmap_dma
(
struct
svc_rdma_op_ctxt
*
ctxt
);
extern
struct
svc_rdma_req_map
*
svc_rdma_get_req_map
(
void
);
extern
void
svc_rdma_put_req_map
(
struct
svc_rdma_req_map
*
);
extern
int
svc_rdma_fastreg
(
struct
svcxprt_rdma
*
,
struct
svc_rdma_fastreg_mr
*
);
extern
struct
svc_rdma_fastreg_mr
*
svc_rdma_get_frmr
(
struct
svcxprt_rdma
*
);
extern
void
svc_rdma_put_frmr
(
struct
svcxprt_rdma
*
,
struct
svc_rdma_fastreg_mr
*
);
extern
void
svc_sq_reap
(
struct
svcxprt_rdma
*
);
extern
void
svc_rq_reap
(
struct
svcxprt_rdma
*
);
extern
struct
svc_xprt_class
svc_rdma_class
;
...
...
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
View file @
107e0008
...
...
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
*
* Assumptions:
* - chunk[0]->position points to pages[0] at an offset of 0
* - pages[] is not physically or virtually contigous and consists of
* - pages[] is not physically or virtually contig
u
ous and consists of
* PAGE_SIZE elements.
*
* Output:
...
...
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
* chunk in the read list
*
*/
static
int
rdma_rcl_to_sge
(
struct
svcxprt_rdma
*
xprt
,
static
int
map_read_chunks
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rqst
*
rqstp
,
struct
svc_rdma_op_ctxt
*
head
,
struct
rpcrdma_msg
*
rmsgp
,
...
...
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
return
sge_no
;
}
static
void
rdma_set_ctxt_sge
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_op_ctxt
*
ctxt
,
struct
kvec
*
vec
,
u64
*
sgl_offset
,
int
count
)
/* Map a read-chunk-list to an XDR and fast register the page-list.
*
* Assumptions:
* - chunk[0] position points to pages[0] at an offset of 0
* - pages[] will be made physically contiguous by creating a one-off memory
* region using the fastreg verb.
* - byte_count is # of bytes in read-chunk-list
* - ch_count is # of chunks in read-chunk-list
*
* Output:
* - sge array pointing into pages[] array.
* - chunk_sge array specifying sge index and count for each
* chunk in the read list
*/
static
int
fast_reg_read_chunks
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rqst
*
rqstp
,
struct
svc_rdma_op_ctxt
*
head
,
struct
rpcrdma_msg
*
rmsgp
,
struct
svc_rdma_req_map
*
rpl_map
,
struct
svc_rdma_req_map
*
chl_map
,
int
ch_count
,
int
byte_count
)
{
int
page_no
;
int
ch_no
;
u32
offset
;
struct
rpcrdma_read_chunk
*
ch
;
struct
svc_rdma_fastreg_mr
*
frmr
;
int
ret
=
0
;
frmr
=
svc_rdma_get_frmr
(
xprt
);
if
(
IS_ERR
(
frmr
))
return
-
ENOMEM
;
head
->
frmr
=
frmr
;
head
->
arg
.
head
[
0
]
=
rqstp
->
rq_arg
.
head
[
0
];
head
->
arg
.
tail
[
0
]
=
rqstp
->
rq_arg
.
tail
[
0
];
head
->
arg
.
pages
=
&
head
->
pages
[
head
->
count
];
head
->
hdr_count
=
head
->
count
;
/* save count of hdr pages */
head
->
arg
.
page_base
=
0
;
head
->
arg
.
page_len
=
byte_count
;
head
->
arg
.
len
=
rqstp
->
rq_arg
.
len
+
byte_count
;
head
->
arg
.
buflen
=
rqstp
->
rq_arg
.
buflen
+
byte_count
;
/* Fast register the page list */
frmr
->
kva
=
page_address
(
rqstp
->
rq_arg
.
pages
[
0
]);
frmr
->
direction
=
DMA_FROM_DEVICE
;
frmr
->
access_flags
=
(
IB_ACCESS_LOCAL_WRITE
|
IB_ACCESS_REMOTE_WRITE
);
frmr
->
map_len
=
byte_count
;
frmr
->
page_list_len
=
PAGE_ALIGN
(
byte_count
)
>>
PAGE_SHIFT
;
for
(
page_no
=
0
;
page_no
<
frmr
->
page_list_len
;
page_no
++
)
{
frmr
->
page_list
->
page_list
[
page_no
]
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
page_address
(
rqstp
->
rq_arg
.
pages
[
page_no
]),
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
frmr
->
page_list
->
page_list
[
page_no
]))
goto
fatal_err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
head
->
arg
.
pages
[
page_no
]
=
rqstp
->
rq_arg
.
pages
[
page_no
];
}
head
->
count
+=
page_no
;
/* rq_respages points one past arg pages */
rqstp
->
rq_respages
=
&
rqstp
->
rq_arg
.
pages
[
page_no
];
/* Create the reply and chunk maps */
offset
=
0
;
ch
=
(
struct
rpcrdma_read_chunk
*
)
&
rmsgp
->
rm_body
.
rm_chunks
[
0
];
for
(
ch_no
=
0
;
ch_no
<
ch_count
;
ch_no
++
)
{
rpl_map
->
sge
[
ch_no
].
iov_base
=
frmr
->
kva
+
offset
;
rpl_map
->
sge
[
ch_no
].
iov_len
=
ch
->
rc_target
.
rs_length
;
chl_map
->
ch
[
ch_no
].
count
=
1
;
chl_map
->
ch
[
ch_no
].
start
=
ch_no
;
offset
+=
ch
->
rc_target
.
rs_length
;
ch
++
;
}
ret
=
svc_rdma_fastreg
(
xprt
,
frmr
);
if
(
ret
)
goto
fatal_err
;
return
ch_no
;
fatal_err:
printk
(
"svcrdma: error fast registering xdr for xprt %p"
,
xprt
);
svc_rdma_put_frmr
(
xprt
,
frmr
);
return
-
EIO
;
}
static
int
rdma_set_ctxt_sge
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_op_ctxt
*
ctxt
,
struct
svc_rdma_fastreg_mr
*
frmr
,
struct
kvec
*
vec
,
u64
*
sgl_offset
,
int
count
)
{
int
i
;
ctxt
->
count
=
count
;
ctxt
->
direction
=
DMA_FROM_DEVICE
;
for
(
i
=
0
;
i
<
count
;
i
++
)
{
atomic_inc
(
&
xprt
->
sc_dma_used
);
ctxt
->
sge
[
i
].
addr
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
vec
[
i
].
iov_base
,
vec
[
i
].
iov_len
,
DMA_FROM_DEVICE
);
ctxt
->
sge
[
i
].
length
=
0
;
/* in case map fails */
if
(
!
frmr
)
{
ctxt
->
sge
[
i
].
addr
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
vec
[
i
].
iov_base
,
vec
[
i
].
iov_len
,
DMA_FROM_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
ctxt
->
sge
[
i
].
addr
))
return
-
EINVAL
;
ctxt
->
sge
[
i
].
lkey
=
xprt
->
sc_dma_lkey
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
}
else
{
ctxt
->
sge
[
i
].
addr
=
(
unsigned
long
)
vec
[
i
].
iov_base
;
ctxt
->
sge
[
i
].
lkey
=
frmr
->
mr
->
lkey
;
}
ctxt
->
sge
[
i
].
length
=
vec
[
i
].
iov_len
;
ctxt
->
sge
[
i
].
lkey
=
xprt
->
sc_phys_mr
->
lkey
;
*
sgl_offset
=
*
sgl_offset
+
vec
[
i
].
iov_len
;
}
return
0
;
}
static
int
rdma_read_max_sge
(
struct
svcxprt_rdma
*
xprt
,
int
sge_count
)
...
...
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
struct
svc_rdma_op_ctxt
*
hdr_ctxt
)
{
struct
ib_send_wr
read_wr
;
struct
ib_send_wr
inv_wr
;
int
err
=
0
;
int
ch_no
;
int
ch_count
;
...
...
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
svc_rdma_rcl_chunk_counts
(
ch
,
&
ch_count
,
&
byte_count
);
if
(
ch_count
>
RPCSVC_MAXPAGES
)
return
-
EINVAL
;
sge_count
=
rdma_rcl_to_sge
(
xprt
,
rqstp
,
hdr_ctxt
,
rmsgp
,
rpl_map
,
chl_map
,
ch_count
,
byte_count
);
if
(
!
xprt
->
sc_frmr_pg_list_len
)
sge_count
=
map_read_chunks
(
xprt
,
rqstp
,
hdr_ctxt
,
rmsgp
,
rpl_map
,
chl_map
,
ch_count
,
byte_count
);
else
sge_count
=
fast_reg_read_chunks
(
xprt
,
rqstp
,
hdr_ctxt
,
rmsgp
,
rpl_map
,
chl_map
,
ch_count
,
byte_count
);
if
(
sge_count
<
0
)
{
err
=
-
EIO
;
goto
out
;
}
sgl_offset
=
0
;
ch_no
=
0
;
...
...
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
next_sge:
ctxt
=
svc_rdma_get_context
(
xprt
);
ctxt
->
direction
=
DMA_FROM_DEVICE
;
ctxt
->
frmr
=
hdr_ctxt
->
frmr
;
ctxt
->
read_hdr
=
NULL
;
clear_bit
(
RDMACTXT_F_LAST_CTXT
,
&
ctxt
->
flags
);
clear_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
);
/* Prepare READ WR */
memset
(
&
read_wr
,
0
,
sizeof
read_wr
);
ctxt
->
wr_op
=
IB_WR_RDMA_READ
;
read_wr
.
wr_id
=
(
unsigned
long
)
ctxt
;
read_wr
.
opcode
=
IB_WR_RDMA_READ
;
ctxt
->
wr_op
=
read_wr
.
opcode
;
read_wr
.
send_flags
=
IB_SEND_SIGNALED
;
read_wr
.
wr
.
rdma
.
rkey
=
ch
->
rc_target
.
rs_handle
;
read_wr
.
wr
.
rdma
.
remote_addr
=
...
...
@@ -327,10 +444,15 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
read_wr
.
sg_list
=
ctxt
->
sge
;
read_wr
.
num_sge
=
rdma_read_max_sge
(
xprt
,
chl_map
->
ch
[
ch_no
].
count
);
rdma_set_ctxt_sge
(
xprt
,
ctxt
,
&
rpl_map
->
sge
[
chl_map
->
ch
[
ch_no
].
start
],
&
sgl_offset
,
read_wr
.
num_sge
);
err
=
rdma_set_ctxt_sge
(
xprt
,
ctxt
,
hdr_ctxt
->
frmr
,
&
rpl_map
->
sge
[
chl_map
->
ch
[
ch_no
].
start
],
&
sgl_offset
,
read_wr
.
num_sge
);
if
(
err
)
{
svc_rdma_unmap_dma
(
ctxt
);
svc_rdma_put_context
(
ctxt
,
0
);
goto
out
;
}
if
(((
ch
+
1
)
->
rc_discrim
==
0
)
&&
(
read_wr
.
num_sge
==
chl_map
->
ch
[
ch_no
].
count
))
{
/*
...
...
@@ -339,6 +461,29 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
* the client and the RPC needs to be enqueued.
*/
set_bit
(
RDMACTXT_F_LAST_CTXT
,
&
ctxt
->
flags
);
if
(
hdr_ctxt
->
frmr
)
{
set_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
);
/*
* Invalidate the local MR used to map the data
* sink.
*/
if
(
xprt
->
sc_dev_caps
&
SVCRDMA_DEVCAP_READ_W_INV
)
{
read_wr
.
opcode
=
IB_WR_RDMA_READ_WITH_INV
;
ctxt
->
wr_op
=
read_wr
.
opcode
;
read_wr
.
ex
.
invalidate_rkey
=
ctxt
->
frmr
->
mr
->
lkey
;
}
else
{
/* Prepare INVALIDATE WR */
memset
(
&
inv_wr
,
0
,
sizeof
inv_wr
);
inv_wr
.
opcode
=
IB_WR_LOCAL_INV
;
inv_wr
.
send_flags
=
IB_SEND_SIGNALED
;
inv_wr
.
ex
.
invalidate_rkey
=
hdr_ctxt
->
frmr
->
mr
->
lkey
;
read_wr
.
next
=
&
inv_wr
;
}
}
ctxt
->
read_hdr
=
hdr_ctxt
;
}
/* Post the read */
...
...
net/sunrpc/xprtrdma/svc_rdma_sendto.c
View file @
107e0008
...
...
@@ -69,9 +69,127 @@
* array is only concerned with the reply we are assured that we have
* on extra page for the RPCRMDA header.
*/
static
void
xdr_to_sge
(
struct
svcxprt_rdma
*
xprt
,
struct
xdr_buf
*
xdr
,
struct
svc_rdma_req_map
*
vec
)
int
fast_reg_xdr
(
struct
svcxprt_rdma
*
xprt
,
struct
xdr_buf
*
xdr
,
struct
svc_rdma_req_map
*
vec
)
{
int
sge_no
;
u32
sge_bytes
;
u32
page_bytes
;
u32
page_off
;
int
page_no
=
0
;
u8
*
frva
;
struct
svc_rdma_fastreg_mr
*
frmr
;
frmr
=
svc_rdma_get_frmr
(
xprt
);
if
(
IS_ERR
(
frmr
))
return
-
ENOMEM
;
vec
->
frmr
=
frmr
;
/* Skip the RPCRDMA header */
sge_no
=
1
;
/* Map the head. */
frva
=
(
void
*
)((
unsigned
long
)(
xdr
->
head
[
0
].
iov_base
)
&
PAGE_MASK
);
vec
->
sge
[
sge_no
].
iov_base
=
xdr
->
head
[
0
].
iov_base
;
vec
->
sge
[
sge_no
].
iov_len
=
xdr
->
head
[
0
].
iov_len
;
vec
->
count
=
2
;
sge_no
++
;
/* Build the FRMR */
frmr
->
kva
=
frva
;
frmr
->
direction
=
DMA_TO_DEVICE
;
frmr
->
access_flags
=
0
;
frmr
->
map_len
=
PAGE_SIZE
;
frmr
->
page_list_len
=
1
;
frmr
->
page_list
->
page_list
[
page_no
]
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
(
void
*
)
xdr
->
head
[
0
].
iov_base
,
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
frmr
->
page_list
->
page_list
[
page_no
]))
goto
fatal_err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
page_off
=
xdr
->
page_base
;
page_bytes
=
xdr
->
page_len
+
page_off
;
if
(
!
page_bytes
)
goto
encode_tail
;
/* Map the pages */
vec
->
sge
[
sge_no
].
iov_base
=
frva
+
frmr
->
map_len
+
page_off
;
vec
->
sge
[
sge_no
].
iov_len
=
page_bytes
;
sge_no
++
;
while
(
page_bytes
)
{
struct
page
*
page
;
page
=
xdr
->
pages
[
page_no
++
];
sge_bytes
=
min_t
(
u32
,
page_bytes
,
(
PAGE_SIZE
-
page_off
));
page_bytes
-=
sge_bytes
;
frmr
->
page_list
->
page_list
[
page_no
]
=
ib_dma_map_page
(
xprt
->
sc_cm_id
->
device
,
page
,
0
,
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
frmr
->
page_list
->
page_list
[
page_no
]))
goto
fatal_err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
page_off
=
0
;
/* reset for next time through loop */
frmr
->
map_len
+=
PAGE_SIZE
;
frmr
->
page_list_len
++
;
}
vec
->
count
++
;
encode_tail:
/* Map tail */
if
(
0
==
xdr
->
tail
[
0
].
iov_len
)
goto
done
;
vec
->
count
++
;
vec
->
sge
[
sge_no
].
iov_len
=
xdr
->
tail
[
0
].
iov_len
;
if
(((
unsigned
long
)
xdr
->
tail
[
0
].
iov_base
&
PAGE_MASK
)
==
((
unsigned
long
)
xdr
->
head
[
0
].
iov_base
&
PAGE_MASK
))
{
/*
* If head and tail use the same page, we don't need
* to map it again.
*/
vec
->
sge
[
sge_no
].
iov_base
=
xdr
->
tail
[
0
].
iov_base
;
}
else
{
void
*
va
;
/* Map another page for the tail */
page_off
=
(
unsigned
long
)
xdr
->
tail
[
0
].
iov_base
&
~
PAGE_MASK
;
va
=
(
void
*
)((
unsigned
long
)
xdr
->
tail
[
0
].
iov_base
&
PAGE_MASK
);
vec
->
sge
[
sge_no
].
iov_base
=
frva
+
frmr
->
map_len
+
page_off
;
frmr
->
page_list
->
page_list
[
page_no
]
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
va
,
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
frmr
->
page_list
->
page_list
[
page_no
]))
goto
fatal_err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
frmr
->
map_len
+=
PAGE_SIZE
;
frmr
->
page_list_len
++
;
}
done:
if
(
svc_rdma_fastreg
(
xprt
,
frmr
))
goto
fatal_err
;
return
0
;
fatal_err:
printk
(
"svcrdma: Error fast registering memory for xprt %p
\n
"
,
xprt
);
svc_rdma_put_frmr
(
xprt
,
frmr
);
return
-
EIO
;
}
static
int
map_xdr
(
struct
svcxprt_rdma
*
xprt
,
struct
xdr_buf
*
xdr
,
struct
svc_rdma_req_map
*
vec
)
{
int
sge_max
=
(
xdr
->
len
+
PAGE_SIZE
-
1
)
/
PAGE_SIZE
+
3
;
int
sge_no
;
...
...
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
BUG_ON
(
xdr
->
len
!=
(
xdr
->
head
[
0
].
iov_len
+
xdr
->
page_len
+
xdr
->
tail
[
0
].
iov_len
));
if
(
xprt
->
sc_frmr_pg_list_len
)
return
fast_reg_xdr
(
xprt
,
xdr
,
vec
);
/* Skip the first sge, this is for the RPCRDMA header */
sge_no
=
1
;
...
...
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
BUG_ON
(
sge_no
>
sge_max
);
vec
->
count
=
sge_no
;
return
0
;
}
/* Assumptions:
* - We are using FRMR
* - or -
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static
int
send_write
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rqst
*
rqstp
,
...
...
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_no
=
0
;
/* Copy the remaining SGE */
while
(
bc
!=
0
&&
xdr_sge_no
<
vec
->
count
)
{
sge
[
sge_no
].
lkey
=
xprt
->
sc_phys_mr
->
lkey
;
sge_bytes
=
min
((
size_t
)
bc
,
(
size_t
)(
vec
->
sge
[
xdr_sge_no
].
iov_len
-
sge_off
));
while
(
bc
!=
0
)
{
sge_bytes
=
min_t
(
size_t
,
bc
,
vec
->
sge
[
xdr_sge_no
].
iov_len
-
sge_off
);
sge
[
sge_no
].
length
=
sge_bytes
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
sge
[
sge_no
].
addr
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
(
void
*
)
vec
->
sge
[
xdr_sge_no
].
iov_base
+
sge_off
,
sge_bytes
,
DMA_TO_DEVICE
);
if
(
dma_mapping_error
(
xprt
->
sc_cm_id
->
device
->
dma_device
,
sge
[
sge_no
].
addr
))
goto
err
;
if
(
!
vec
->
frmr
)
{
sge
[
sge_no
].
addr
=
ib_dma_map_single
(
xprt
->
sc_cm_id
->
device
,
(
void
*
)
vec
->
sge
[
xdr_sge_no
].
iov_base
+
sge_off
,
sge_bytes
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
sge
[
sge_no
].
addr
))
goto
err
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
sge
[
sge_no
].
lkey
=
xprt
->
sc_dma_lkey
;
}
else
{
sge
[
sge_no
].
addr
=
(
unsigned
long
)
vec
->
sge
[
xdr_sge_no
].
iov_base
+
sge_off
;
sge
[
sge_no
].
lkey
=
vec
->
frmr
->
mr
->
lkey
;
}
ctxt
->
count
++
;
ctxt
->
frmr
=
vec
->
frmr
;
sge_off
=
0
;
sge_no
++
;
ctxt
->
count
++
;
xdr_sge_no
++
;
BUG_ON
(
xdr_sge_no
>
vec
->
count
);
bc
-=
sge_bytes
;
}
BUG_ON
(
bc
!=
0
);
BUG_ON
(
xdr_sge_no
>
vec
->
count
);
/* Prepare WRITE WR */
memset
(
&
write_wr
,
0
,
sizeof
write_wr
);
ctxt
->
wr_op
=
IB_WR_RDMA_WRITE
;
...
...
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary
=
(
struct
rpcrdma_write_array
*
)
&
rdma_resp
->
rm_body
.
rm_chunks
[
1
];
max_write
=
xprt
->
sc_max_sge
*
PAGE_SIZE
;
if
(
vec
->
frmr
)
max_write
=
vec
->
frmr
->
map_len
;
else
max_write
=
xprt
->
sc_max_sge
*
PAGE_SIZE
;
/* Write chunks start at the pagelist */
for
(
xdr_off
=
rqstp
->
rq_res
.
head
[
0
].
iov_len
,
chunk_no
=
0
;
...
...
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary
=
(
struct
rpcrdma_write_array
*
)
&
rdma_resp
->
rm_body
.
rm_chunks
[
2
];
max_write
=
xprt
->
sc_max_sge
*
PAGE_SIZE
;
if
(
vec
->
frmr
)
max_write
=
vec
->
frmr
->
map_len
;
else
max_write
=
xprt
->
sc_max_sge
*
PAGE_SIZE
;
/* xdr offset starts at RPC message */
for
(
xdr_off
=
0
,
chunk_no
=
0
;
...
...
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
ch
=
&
arg_ary
->
wc_array
[
chunk_no
].
wc_target
;
write_len
=
min
(
xfer_len
,
ch
->
rs_length
);
/* Prepare the reply chunk given the length actually
* written */
rs_offset
=
get_unaligned
(
&
(
ch
->
rs_offset
));
...
...
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
int
byte_count
)
{
struct
ib_send_wr
send_wr
;
struct
ib_send_wr
inv_wr
;
int
sge_no
;
int
sge_bytes
;
int
page_no
;
...
...
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the context */
ctxt
->
pages
[
0
]
=
page
;
ctxt
->
count
=
1
;
ctxt
->
frmr
=
vec
->
frmr
;
if
(
vec
->
frmr
)
set_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
);
else
clear_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
);
/* Prepare the SGE for the RPCRDMA Header */
atomic_inc
(
&
rdma
->
sc_dma_used
);
ctxt
->
sge
[
0
].
addr
=
ib_dma_map_page
(
rdma
->
sc_cm_id
->
device
,
page
,
0
,
PAGE_SIZE
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
rdma
->
sc_cm_id
->
device
,
ctxt
->
sge
[
0
].
addr
))
goto
err
;
atomic_inc
(
&
rdma
->
sc_dma_used
);
ctxt
->
direction
=
DMA_TO_DEVICE
;
ctxt
->
sge
[
0
].
length
=
svc_rdma_xdr_get_reply_hdr_len
(
rdma_resp
);
ctxt
->
sge
[
0
].
lkey
=
rdma
->
sc_
phys_mr
->
lkey
;
ctxt
->
sge
[
0
].
lkey
=
rdma
->
sc_
dma_
lkey
;
/* Determine how many of our SGE are to be transmitted */
for
(
sge_no
=
1
;
byte_count
&&
sge_no
<
vec
->
count
;
sge_no
++
)
{
sge_bytes
=
min_t
(
size_t
,
vec
->
sge
[
sge_no
].
iov_len
,
byte_count
);
byte_count
-=
sge_bytes
;
atomic_inc
(
&
rdma
->
sc_dma_used
);
ctxt
->
sge
[
sge_no
].
addr
=
ib_dma_map_single
(
rdma
->
sc_cm_id
->
device
,
vec
->
sge
[
sge_no
].
iov_base
,
sge_bytes
,
DMA_TO_DEVICE
);
if
(
!
vec
->
frmr
)
{
ctxt
->
sge
[
sge_no
].
addr
=
ib_dma_map_single
(
rdma
->
sc_cm_id
->
device
,
vec
->
sge
[
sge_no
].
iov_base
,
sge_bytes
,
DMA_TO_DEVICE
);
if
(
ib_dma_mapping_error
(
rdma
->
sc_cm_id
->
device
,
ctxt
->
sge
[
sge_no
].
addr
))
goto
err
;
atomic_inc
(
&
rdma
->
sc_dma_used
);
ctxt
->
sge
[
sge_no
].
lkey
=
rdma
->
sc_dma_lkey
;
}
else
{
ctxt
->
sge
[
sge_no
].
addr
=
(
unsigned
long
)
vec
->
sge
[
sge_no
].
iov_base
;
ctxt
->
sge
[
sge_no
].
lkey
=
vec
->
frmr
->
mr
->
lkey
;
}
ctxt
->
sge
[
sge_no
].
length
=
sge_bytes
;
ctxt
->
sge
[
sge_no
].
lkey
=
rdma
->
sc_phys_mr
->
lkey
;
}
BUG_ON
(
byte_count
!=
0
);
...
...
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt
->
pages
[
page_no
+
1
]
=
rqstp
->
rq_respages
[
page_no
];
ctxt
->
count
++
;
rqstp
->
rq_respages
[
page_no
]
=
NULL
;
/* If there are more pages than SGE, terminate SGE list */
/*
* If there are more pages than SGE, terminate SGE
* list so that svc_rdma_unmap_dma doesn't attempt to
* unmap garbage.
*/
if
(
page_no
+
1
>=
sge_no
)
ctxt
->
sge
[
page_no
+
1
].
length
=
0
;
}
BUG_ON
(
sge_no
>
rdma
->
sc_max_sge
);
BUG_ON
(
sge_no
>
ctxt
->
count
);
memset
(
&
send_wr
,
0
,
sizeof
send_wr
);
ctxt
->
wr_op
=
IB_WR_SEND
;
send_wr
.
wr_id
=
(
unsigned
long
)
ctxt
;
...
...
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr
.
num_sge
=
sge_no
;
send_wr
.
opcode
=
IB_WR_SEND
;
send_wr
.
send_flags
=
IB_SEND_SIGNALED
;
if
(
vec
->
frmr
)
{
/* Prepare INVALIDATE WR */
memset
(
&
inv_wr
,
0
,
sizeof
inv_wr
);
inv_wr
.
opcode
=
IB_WR_LOCAL_INV
;
inv_wr
.
send_flags
=
IB_SEND_SIGNALED
;
inv_wr
.
ex
.
invalidate_rkey
=
vec
->
frmr
->
mr
->
lkey
;
send_wr
.
next
=
&
inv_wr
;
}
ret
=
svc_rdma_send
(
rdma
,
&
send_wr
);
if
(
ret
)
svc_rdma_put_context
(
ctxt
,
1
)
;
goto
err
;
return
ret
;
return
0
;
err:
svc_rdma_put_frmr
(
rdma
,
vec
->
frmr
);
svc_rdma_put_context
(
ctxt
,
1
);
return
-
EIO
;
}
void
svc_rdma_prep_reply_hdr
(
struct
svc_rqst
*
rqstp
)
...
...
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ctxt
=
svc_rdma_get_context
(
rdma
);
ctxt
->
direction
=
DMA_TO_DEVICE
;
vec
=
svc_rdma_get_req_map
();
xdr_to_sge
(
rdma
,
&
rqstp
->
rq_res
,
vec
);
ret
=
map_xdr
(
rdma
,
&
rqstp
->
rq_res
,
vec
);
if
(
ret
)
goto
err0
;
inline_bytes
=
rqstp
->
rq_res
.
len
;
/* Create the RDMA response header */
...
...
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if
(
ret
<
0
)
{
printk
(
KERN_ERR
"svcrdma: failed to send write chunks, rc=%d
\n
"
,
ret
);
goto
err
or
;
goto
err
1
;
}
inline_bytes
-=
ret
;
...
...
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if
(
ret
<
0
)
{
printk
(
KERN_ERR
"svcrdma: failed to send reply chunks, rc=%d
\n
"
,
ret
);
goto
err
or
;
goto
err
1
;
}
inline_bytes
-=
ret
;
...
...
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
svc_rdma_put_req_map
(
vec
);
dprintk
(
"svcrdma: send_reply returns %d
\n
"
,
ret
);
return
ret
;
error:
err1:
put_page
(
res_page
);
err0:
svc_rdma_put_req_map
(
vec
);
svc_rdma_put_context
(
ctxt
,
0
);
put_page
(
res_page
);
return
ret
;
}
net/sunrpc/xprtrdma/svc_rdma_transport.c
View file @
107e0008
...
...
@@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
ctxt
->
xprt
=
xprt
;
INIT_LIST_HEAD
(
&
ctxt
->
dto_q
);
ctxt
->
count
=
0
;
ctxt
->
frmr
=
NULL
;
atomic_inc
(
&
xprt
->
sc_ctxt_used
);
return
ctxt
;
}
static
void
svc_rdma_unmap_dma
(
struct
svc_rdma_op_ctxt
*
ctxt
)
void
svc_rdma_unmap_dma
(
struct
svc_rdma_op_ctxt
*
ctxt
)
{
struct
svcxprt_rdma
*
xprt
=
ctxt
->
xprt
;
int
i
;
for
(
i
=
0
;
i
<
ctxt
->
count
&&
ctxt
->
sge
[
i
].
length
;
i
++
)
{
atomic_dec
(
&
xprt
->
sc_dma_used
);
ib_dma_unmap_single
(
xprt
->
sc_cm_id
->
device
,
ctxt
->
sge
[
i
].
addr
,
ctxt
->
sge
[
i
].
length
,
ctxt
->
direction
);
/*
* Unmap the DMA addr in the SGE if the lkey matches
* the sc_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
if
(
ctxt
->
sge
[
i
].
lkey
==
xprt
->
sc_dma_lkey
)
{
atomic_dec
(
&
xprt
->
sc_dma_used
);
ib_dma_unmap_single
(
xprt
->
sc_cm_id
->
device
,
ctxt
->
sge
[
i
].
addr
,
ctxt
->
sge
[
i
].
length
,
ctxt
->
direction
);
}
}
}
...
...
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
schedule_timeout_uninterruptible
(
msecs_to_jiffies
(
500
));
}
map
->
count
=
0
;
map
->
frmr
=
NULL
;
return
map
;
}
...
...
@@ -315,6 +325,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
svc_xprt_enqueue
(
&
xprt
->
sc_xprt
);
}
/*
* Processs a completion context
*/
static
void
process_context
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_op_ctxt
*
ctxt
)
{
svc_rdma_unmap_dma
(
ctxt
);
switch
(
ctxt
->
wr_op
)
{
case
IB_WR_SEND
:
if
(
test_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
))
svc_rdma_put_frmr
(
xprt
,
ctxt
->
frmr
);
svc_rdma_put_context
(
ctxt
,
1
);
break
;
case
IB_WR_RDMA_WRITE
:
svc_rdma_put_context
(
ctxt
,
0
);
break
;
case
IB_WR_RDMA_READ
:
case
IB_WR_RDMA_READ_WITH_INV
:
if
(
test_bit
(
RDMACTXT_F_LAST_CTXT
,
&
ctxt
->
flags
))
{
struct
svc_rdma_op_ctxt
*
read_hdr
=
ctxt
->
read_hdr
;
BUG_ON
(
!
read_hdr
);
if
(
test_bit
(
RDMACTXT_F_FAST_UNREG
,
&
ctxt
->
flags
))
svc_rdma_put_frmr
(
xprt
,
ctxt
->
frmr
);
spin_lock_bh
(
&
xprt
->
sc_rq_dto_lock
);
set_bit
(
XPT_DATA
,
&
xprt
->
sc_xprt
.
xpt_flags
);
list_add_tail
(
&
read_hdr
->
dto_q
,
&
xprt
->
sc_read_complete_q
);
spin_unlock_bh
(
&
xprt
->
sc_rq_dto_lock
);
svc_xprt_enqueue
(
&
xprt
->
sc_xprt
);
}
svc_rdma_put_context
(
ctxt
,
0
);
break
;
default:
printk
(
KERN_ERR
"svcrdma: unexpected completion type, "
"opcode=%d
\n
"
,
ctxt
->
wr_op
);
break
;
}
}
/*
* Send Queue Completion Handler - potentially called on interrupt context.
*
...
...
@@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
struct
ib_cq
*
cq
=
xprt
->
sc_sq_cq
;
int
ret
;
if
(
!
test_and_clear_bit
(
RDMAXPRT_SQ_PENDING
,
&
xprt
->
sc_flags
))
return
;
ib_req_notify_cq
(
xprt
->
sc_sq_cq
,
IB_CQ_NEXT_COMP
);
atomic_inc
(
&
rdma_stat_sq_poll
);
while
((
ret
=
ib_poll_cq
(
cq
,
1
,
&
wc
))
>
0
)
{
ctxt
=
(
struct
svc_rdma_op_ctxt
*
)(
unsigned
long
)
wc
.
wr_id
;
xprt
=
ctxt
->
xprt
;
svc_rdma_unmap_dma
(
ctxt
);
if
(
wc
.
status
!=
IB_WC_SUCCESS
)
/* Close the transport */
set_bit
(
XPT_CLOSE
,
&
xprt
->
sc_xprt
.
xpt_flags
);
...
...
@@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
atomic_dec
(
&
xprt
->
sc_sq_count
);
wake_up
(
&
xprt
->
sc_send_wait
);
switch
(
ctxt
->
wr_op
)
{
case
IB_WR_SEND
:
svc_rdma_put_context
(
ctxt
,
1
);
break
;
case
IB_WR_RDMA_WRITE
:
svc_rdma_put_context
(
ctxt
,
0
);
break
;
case
IB_WR_RDMA_READ
:
if
(
test_bit
(
RDMACTXT_F_LAST_CTXT
,
&
ctxt
->
flags
))
{
struct
svc_rdma_op_ctxt
*
read_hdr
=
ctxt
->
read_hdr
;
BUG_ON
(
!
read_hdr
);
spin_lock_bh
(
&
xprt
->
sc_rq_dto_lock
);
set_bit
(
XPT_DATA
,
&
xprt
->
sc_xprt
.
xpt_flags
);
list_add_tail
(
&
read_hdr
->
dto_q
,
&
xprt
->
sc_read_complete_q
);
spin_unlock_bh
(
&
xprt
->
sc_rq_dto_lock
);
svc_xprt_enqueue
(
&
xprt
->
sc_xprt
);
}
svc_rdma_put_context
(
ctxt
,
0
);
break
;
ctxt
=
(
struct
svc_rdma_op_ctxt
*
)(
unsigned
long
)
wc
.
wr_id
;
if
(
ctxt
)
process_context
(
xprt
,
ctxt
);
default:
printk
(
KERN_ERR
"svcrdma: unexpected completion type, "
"opcode=%d, status=%d
\n
"
,
wc
.
opcode
,
wc
.
status
);
break
;
}
svc_xprt_put
(
&
xprt
->
sc_xprt
);
}
...
...
@@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD
(
&
cma_xprt
->
sc_dto_q
);
INIT_LIST_HEAD
(
&
cma_xprt
->
sc_rq_dto_q
);
INIT_LIST_HEAD
(
&
cma_xprt
->
sc_read_complete_q
);
INIT_LIST_HEAD
(
&
cma_xprt
->
sc_frmr_q
);
init_waitqueue_head
(
&
cma_xprt
->
sc_send_wait
);
spin_lock_init
(
&
cma_xprt
->
sc_lock
);
spin_lock_init
(
&
cma_xprt
->
sc_rq_dto_lock
);
spin_lock_init
(
&
cma_xprt
->
sc_frmr_q_lock
);
cma_xprt
->
sc_ord
=
svcrdma_ord
;
...
...
@@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
struct
ib_recv_wr
recv_wr
,
*
bad_recv_wr
;
struct
svc_rdma_op_ctxt
*
ctxt
;
struct
page
*
page
;
unsigned
long
pa
;
dma_addr_t
pa
;
int
sge_no
;
int
buflen
;
int
ret
;
...
...
@@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
BUG_ON
(
sge_no
>=
xprt
->
sc_max_sge
);
page
=
svc_rdma_get_page
();
ctxt
->
pages
[
sge_no
]
=
page
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
pa
=
ib_dma_map_page
(
xprt
->
sc_cm_id
->
device
,
page
,
0
,
PAGE_SIZE
,
DMA_FROM_DEVICE
);
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
pa
))
goto
err_put_ctxt
;
atomic_inc
(
&
xprt
->
sc_dma_used
);
ctxt
->
sge
[
sge_no
].
addr
=
pa
;
ctxt
->
sge
[
sge_no
].
length
=
PAGE_SIZE
;
ctxt
->
sge
[
sge_no
].
lkey
=
xprt
->
sc_
phys_mr
->
lkey
;
ctxt
->
sge
[
sge_no
].
lkey
=
xprt
->
sc_
dma_
lkey
;
buflen
+=
PAGE_SIZE
;
}
ctxt
->
count
=
sge_no
;
...
...
@@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
svc_rdma_put_context
(
ctxt
,
1
);
}
return
ret
;
err_put_ctxt:
svc_rdma_put_context
(
ctxt
,
1
);
return
-
ENOMEM
;
}
/*
...
...
@@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
dprintk
(
"svcrdma: Connect request on cma_id=%p, xprt = %p, "
"event=%d
\n
"
,
cma_id
,
cma_id
->
context
,
event
->
event
);
handle_connect_req
(
cma_id
,
event
->
param
.
conn
.
responder_resources
);
event
->
param
.
conn
.
initiator_depth
);
break
;
case
RDMA_CM_EVENT_ESTABLISHED
:
...
...
@@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
return
ERR_PTR
(
ret
);
}
static
struct
svc_rdma_fastreg_mr
*
rdma_alloc_frmr
(
struct
svcxprt_rdma
*
xprt
)
{
struct
ib_mr
*
mr
;
struct
ib_fast_reg_page_list
*
pl
;
struct
svc_rdma_fastreg_mr
*
frmr
;
frmr
=
kmalloc
(
sizeof
(
*
frmr
),
GFP_KERNEL
);
if
(
!
frmr
)
goto
err
;
mr
=
ib_alloc_fast_reg_mr
(
xprt
->
sc_pd
,
RPCSVC_MAXPAGES
);
if
(
!
mr
)
goto
err_free_frmr
;
pl
=
ib_alloc_fast_reg_page_list
(
xprt
->
sc_cm_id
->
device
,
RPCSVC_MAXPAGES
);
if
(
!
pl
)
goto
err_free_mr
;
frmr
->
mr
=
mr
;
frmr
->
page_list
=
pl
;
INIT_LIST_HEAD
(
&
frmr
->
frmr_list
);
return
frmr
;
err_free_mr:
ib_dereg_mr
(
mr
);
err_free_frmr:
kfree
(
frmr
);
err:
return
ERR_PTR
(
-
ENOMEM
);
}
static
void
rdma_dealloc_frmr_q
(
struct
svcxprt_rdma
*
xprt
)
{
struct
svc_rdma_fastreg_mr
*
frmr
;
while
(
!
list_empty
(
&
xprt
->
sc_frmr_q
))
{
frmr
=
list_entry
(
xprt
->
sc_frmr_q
.
next
,
struct
svc_rdma_fastreg_mr
,
frmr_list
);
list_del_init
(
&
frmr
->
frmr_list
);
ib_dereg_mr
(
frmr
->
mr
);
ib_free_fast_reg_page_list
(
frmr
->
page_list
);
kfree
(
frmr
);
}
}
struct
svc_rdma_fastreg_mr
*
svc_rdma_get_frmr
(
struct
svcxprt_rdma
*
rdma
)
{
struct
svc_rdma_fastreg_mr
*
frmr
=
NULL
;
spin_lock_bh
(
&
rdma
->
sc_frmr_q_lock
);
if
(
!
list_empty
(
&
rdma
->
sc_frmr_q
))
{
frmr
=
list_entry
(
rdma
->
sc_frmr_q
.
next
,
struct
svc_rdma_fastreg_mr
,
frmr_list
);
list_del_init
(
&
frmr
->
frmr_list
);
frmr
->
map_len
=
0
;
frmr
->
page_list_len
=
0
;
}
spin_unlock_bh
(
&
rdma
->
sc_frmr_q_lock
);
if
(
frmr
)
return
frmr
;
return
rdma_alloc_frmr
(
rdma
);
}
static
void
frmr_unmap_dma
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_fastreg_mr
*
frmr
)
{
int
page_no
;
for
(
page_no
=
0
;
page_no
<
frmr
->
page_list_len
;
page_no
++
)
{
dma_addr_t
addr
=
frmr
->
page_list
->
page_list
[
page_no
];
if
(
ib_dma_mapping_error
(
frmr
->
mr
->
device
,
addr
))
continue
;
atomic_dec
(
&
xprt
->
sc_dma_used
);
ib_dma_unmap_single
(
frmr
->
mr
->
device
,
addr
,
PAGE_SIZE
,
frmr
->
direction
);
}
}
void
svc_rdma_put_frmr
(
struct
svcxprt_rdma
*
rdma
,
struct
svc_rdma_fastreg_mr
*
frmr
)
{
if
(
frmr
)
{
frmr_unmap_dma
(
rdma
,
frmr
);
spin_lock_bh
(
&
rdma
->
sc_frmr_q_lock
);
BUG_ON
(
!
list_empty
(
&
frmr
->
frmr_list
));
list_add
(
&
frmr
->
frmr_list
,
&
rdma
->
sc_frmr_q
);
spin_unlock_bh
(
&
rdma
->
sc_frmr_q_lock
);
}
}
/*
* This is the xpo_recvfrom function for listening endpoints. Its
* purpose is to accept incoming connections. The CMA callback handler
...
...
@@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct
rdma_conn_param
conn_param
;
struct
ib_qp_init_attr
qp_attr
;
struct
ib_device_attr
devattr
;
int
dma_mr_acc
;
int
need_dma_mr
;
int
ret
;
int
i
;
...
...
@@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
newxprt
->
sc_qp
=
newxprt
->
sc_cm_id
->
qp
;
/* Register all of physical memory */
newxprt
->
sc_phys_mr
=
ib_get_dma_mr
(
newxprt
->
sc_pd
,
IB_ACCESS_LOCAL_WRITE
|
IB_ACCESS_REMOTE_WRITE
);
if
(
IS_ERR
(
newxprt
->
sc_phys_mr
))
{
dprintk
(
"svcrdma: Failed to create DMA MR ret=%d
\n
"
,
ret
);
/*
* Use the most secure set of MR resources based on the
* transport type and available memory management features in
* the device. Here's the table implemented below:
*
* Fast Global DMA Remote WR
* Reg LKEY MR Access
* Sup'd Sup'd Needed Needed
*
* IWARP N N Y Y
* N Y Y Y
* Y N Y N
* Y Y N -
*
* IB N N Y N
* N Y N -
* Y N Y N
* Y Y N -
*
* NB: iWARP requires remote write access for the data sink
* of an RDMA_READ. IB does not.
*/
if
(
devattr
.
device_cap_flags
&
IB_DEVICE_MEM_MGT_EXTENSIONS
)
{
newxprt
->
sc_frmr_pg_list_len
=
devattr
.
max_fast_reg_page_list_len
;
newxprt
->
sc_dev_caps
|=
SVCRDMA_DEVCAP_FAST_REG
;
}
/*
* Determine if a DMA MR is required and if so, what privs are required
*/
switch
(
rdma_node_get_transport
(
newxprt
->
sc_cm_id
->
device
->
node_type
))
{
case
RDMA_TRANSPORT_IWARP
:
newxprt
->
sc_dev_caps
|=
SVCRDMA_DEVCAP_READ_W_INV
;
if
(
!
(
newxprt
->
sc_dev_caps
&
SVCRDMA_DEVCAP_FAST_REG
))
{
need_dma_mr
=
1
;
dma_mr_acc
=
(
IB_ACCESS_LOCAL_WRITE
|
IB_ACCESS_REMOTE_WRITE
);
}
else
if
(
!
(
devattr
.
device_cap_flags
&
IB_DEVICE_LOCAL_DMA_LKEY
))
{
need_dma_mr
=
1
;
dma_mr_acc
=
IB_ACCESS_LOCAL_WRITE
;
}
else
need_dma_mr
=
0
;
break
;
case
RDMA_TRANSPORT_IB
:
if
(
!
(
devattr
.
device_cap_flags
&
IB_DEVICE_LOCAL_DMA_LKEY
))
{
need_dma_mr
=
1
;
dma_mr_acc
=
IB_ACCESS_LOCAL_WRITE
;
}
else
need_dma_mr
=
0
;
break
;
default:
goto
errout
;
}
/* Create the DMA MR if needed, otherwise, use the DMA LKEY */
if
(
need_dma_mr
)
{
/* Register all of physical memory */
newxprt
->
sc_phys_mr
=
ib_get_dma_mr
(
newxprt
->
sc_pd
,
dma_mr_acc
);
if
(
IS_ERR
(
newxprt
->
sc_phys_mr
))
{
dprintk
(
"svcrdma: Failed to create DMA MR ret=%d
\n
"
,
ret
);
goto
errout
;
}
newxprt
->
sc_dma_lkey
=
newxprt
->
sc_phys_mr
->
lkey
;
}
else
newxprt
->
sc_dma_lkey
=
newxprt
->
sc_cm_id
->
device
->
local_dma_lkey
;
/* Post receive buffers */
for
(
i
=
0
;
i
<
newxprt
->
sc_max_requests
;
i
++
)
{
ret
=
svc_rdma_post_recv
(
newxprt
);
...
...
@@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work)
WARN_ON
(
atomic_read
(
&
rdma
->
sc_ctxt_used
)
!=
0
);
WARN_ON
(
atomic_read
(
&
rdma
->
sc_dma_used
)
!=
0
);
/* De-allocate fastreg mr */
rdma_dealloc_frmr_q
(
rdma
);
/* Destroy the QP if present (not a listener) */
if
(
rdma
->
sc_qp
&&
!
IS_ERR
(
rdma
->
sc_qp
))
ib_destroy_qp
(
rdma
->
sc_qp
);
...
...
@@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return
1
;
}
/*
* Attempt to register the kvec representing the RPC memory with the
* device.
*
* Returns:
* NULL : The device does not support fastreg or there were no more
* fastreg mr.
* frmr : The kvec register request was successfully posted.
* <0 : An error was encountered attempting to register the kvec.
*/
int
svc_rdma_fastreg
(
struct
svcxprt_rdma
*
xprt
,
struct
svc_rdma_fastreg_mr
*
frmr
)
{
struct
ib_send_wr
fastreg_wr
;
u8
key
;
/* Bump the key */
key
=
(
u8
)(
frmr
->
mr
->
lkey
&
0x000000FF
);
ib_update_fast_reg_key
(
frmr
->
mr
,
++
key
);
/* Prepare FASTREG WR */
memset
(
&
fastreg_wr
,
0
,
sizeof
fastreg_wr
);
fastreg_wr
.
opcode
=
IB_WR_FAST_REG_MR
;
fastreg_wr
.
send_flags
=
IB_SEND_SIGNALED
;
fastreg_wr
.
wr
.
fast_reg
.
iova_start
=
(
unsigned
long
)
frmr
->
kva
;
fastreg_wr
.
wr
.
fast_reg
.
page_list
=
frmr
->
page_list
;
fastreg_wr
.
wr
.
fast_reg
.
page_list_len
=
frmr
->
page_list_len
;
fastreg_wr
.
wr
.
fast_reg
.
page_shift
=
PAGE_SHIFT
;
fastreg_wr
.
wr
.
fast_reg
.
length
=
frmr
->
map_len
;
fastreg_wr
.
wr
.
fast_reg
.
access_flags
=
frmr
->
access_flags
;
fastreg_wr
.
wr
.
fast_reg
.
rkey
=
frmr
->
mr
->
lkey
;
return
svc_rdma_send
(
xprt
,
&
fastreg_wr
);
}
int
svc_rdma_send
(
struct
svcxprt_rdma
*
xprt
,
struct
ib_send_wr
*
wr
)
{
struct
ib_send_wr
*
bad_wr
;
struct
ib_send_wr
*
bad_wr
,
*
n_wr
;
int
wr_count
;
int
i
;
int
ret
;
if
(
test_bit
(
XPT_CLOSE
,
&
xprt
->
sc_xprt
.
xpt_flags
))
return
-
ENOTCONN
;
BUG_ON
(
wr
->
send_flags
!=
IB_SEND_SIGNALED
);
BUG_ON
(((
struct
svc_rdma_op_ctxt
*
)(
unsigned
long
)
wr
->
wr_id
)
->
wr_op
!=
wr
->
opcode
);
wr_count
=
1
;
for
(
n_wr
=
wr
->
next
;
n_wr
;
n_wr
=
n_wr
->
next
)
wr_count
++
;
/* If the SQ is full, wait until an SQ entry is available */
while
(
1
)
{
spin_lock_bh
(
&
xprt
->
sc_lock
);
if
(
xprt
->
sc_sq_depth
==
atomic_read
(
&
xprt
->
sc_sq_count
)
)
{
if
(
xprt
->
sc_sq_depth
<
atomic_read
(
&
xprt
->
sc_sq_count
)
+
wr_count
)
{
spin_unlock_bh
(
&
xprt
->
sc_lock
);
atomic_inc
(
&
rdma_stat_sq_starve
);
...
...
@@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
return
0
;
continue
;
}
/* Bumped used SQ WR count and post */
svc_xprt_get
(
&
xprt
->
sc_xprt
);
/* Take a transport ref for each WR posted */
for
(
i
=
0
;
i
<
wr_count
;
i
++
)
svc_xprt_get
(
&
xprt
->
sc_xprt
);
/* Bump used SQ WR count and post */
atomic_add
(
wr_count
,
&
xprt
->
sc_sq_count
);
ret
=
ib_post_send
(
xprt
->
sc_qp
,
wr
,
&
bad_wr
);
if
(
!
ret
)
atomic_inc
(
&
xprt
->
sc_sq_count
);
else
{
svc_xprt_put
(
&
xprt
->
sc_xprt
);
if
(
ret
)
{
set_bit
(
XPT_CLOSE
,
&
xprt
->
sc_xprt
.
xpt_flags
);
atomic_sub
(
wr_count
,
&
xprt
->
sc_sq_count
);
for
(
i
=
0
;
i
<
wr_count
;
i
++
)
svc_xprt_put
(
&
xprt
->
sc_xprt
);
dprintk
(
"svcrdma: failed to post SQ WR rc=%d, "
"sc_sq_count=%d, sc_sq_depth=%d
\n
"
,
ret
,
atomic_read
(
&
xprt
->
sc_sq_count
),
xprt
->
sc_sq_depth
);
}
spin_unlock_bh
(
&
xprt
->
sc_lock
);
if
(
ret
)
wake_up
(
&
xprt
->
sc_send_wait
);
break
;
}
return
ret
;
...
...
@@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
length
=
svc_rdma_xdr_encode_error
(
xprt
,
rmsgp
,
err
,
va
);
/* Prepare SGE for local address */
atomic_inc
(
&
xprt
->
sc_dma_used
);
sge
.
addr
=
ib_dma_map_page
(
xprt
->
sc_cm_id
->
device
,
p
,
0
,
PAGE_SIZE
,
DMA_FROM_DEVICE
);
sge
.
lkey
=
xprt
->
sc_phys_mr
->
lkey
;
if
(
ib_dma_mapping_error
(
xprt
->
sc_cm_id
->
device
,
sge
.
addr
))
{
put_page
(
p
);
return
;
}
atomic_inc
(
&
xprt
->
sc_dma_used
);
sge
.
lkey
=
xprt
->
sc_dma_lkey
;
sge
.
length
=
length
;
ctxt
=
svc_rdma_get_context
(
xprt
);
...
...
@@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
if
(
ret
)
{
dprintk
(
"svcrdma: Error %d posting send for protocol error
\n
"
,
ret
);
ib_dma_unmap_page
(
xprt
->
sc_cm_id
->
device
,
sge
.
addr
,
PAGE_SIZE
,
DMA_FROM_DEVICE
);
svc_rdma_put_context
(
ctxt
,
1
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment