Commit d655a9fb authored by Wengang Wang's avatar Wengang Wang Committed by Doug Ledford

rds: re-entry of rds_ib_xmit/rds_iw_xmit

The BUG_ON at line 452/453 is triggered in function rds_send_xmit.

 441                         while (ret) {
 442                                 tmp = min_t(int, ret, sg->length -
 443                                                       conn->c_xmit_data_off);
 444                                 conn->c_xmit_data_off += tmp;
 445                                 ret -= tmp;
 446                                 if (conn->c_xmit_data_off == sg->length) {
 447                                         conn->c_xmit_data_off = 0;
 448                                         sg++;
 449                                         conn->c_xmit_sg++;
 450                                         if (ret != 0 && conn->c_xmit_sg == rm->data.op_nents)
 451                                                 printk(KERN_ERR "conn %p rm %p sg %p ret %d\n", conn, rm, sg, ret);
 452                                         BUG_ON(ret != 0 &&
 453                                                conn->c_xmit_sg == rm->data.op_nents);
 454                                 }
 455                         }

it is complaining the total sent length is bigger that we want to send.

rds_ib_xmit() is wrong for the second entry for the same rds_message returning
wrong value.

the sg and off passed by rds_send_xmit to rds_ib_xmit is based on
scatterlist.offset/length, but the rds_ib_xmit action is based on
scatterlist.dma_address/dma_length. in case dma_length is larger than length
there is problem. for the 2nd and later entries of rds_ib_xmit for same
rds_message, at least one of the following two is wrong:

1) the scatterlist to start with,  the choosen one can far beyond the correct
   one.
2) the offset to start with within the scatterlist.

fix:
add op_dmasg and op_dmaoff to rm_data_op structure indicating the scatterlist
and offset within the it to start with for rds_ib_xmit respectively. op_dmasg
and op_dmaoff are initialized to zero when doing dma mapping for the first see
of the message and are changed when filling send slots.

the same applies to rds_iw_xmit too.
Signed-off-by: default avatarWengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 52374967
...@@ -605,6 +605,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, ...@@ -605,6 +605,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
} }
rds_message_addref(rm); rds_message_addref(rm);
rm->data.op_dmasg = 0;
rm->data.op_dmaoff = 0;
ic->i_data_op = &rm->data; ic->i_data_op = &rm->data;
/* Finalize the header */ /* Finalize the header */
...@@ -658,7 +660,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, ...@@ -658,7 +660,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
send = &ic->i_sends[pos]; send = &ic->i_sends[pos];
first = send; first = send;
prev = NULL; prev = NULL;
scat = &ic->i_data_op->op_sg[sg]; scat = &ic->i_data_op->op_sg[rm->data.op_dmasg];
i = 0; i = 0;
do { do {
unsigned int len = 0; unsigned int len = 0;
...@@ -680,17 +682,20 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, ...@@ -680,17 +682,20 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
/* Set up the data, if present */ /* Set up the data, if present */
if (i < work_alloc if (i < work_alloc
&& scat != &rm->data.op_sg[rm->data.op_count]) { && scat != &rm->data.op_sg[rm->data.op_count]) {
len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); len = min(RDS_FRAG_SIZE,
ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
send->s_wr.num_sge = 2; send->s_wr.num_sge = 2;
send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off; send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
send->s_sge[1].addr += rm->data.op_dmaoff;
send->s_sge[1].length = len; send->s_sge[1].length = len;
bytes_sent += len; bytes_sent += len;
off += len; rm->data.op_dmaoff += len;
if (off == ib_sg_dma_len(dev, scat)) { if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
scat++; scat++;
off = 0; rm->data.op_dmasg++;
rm->data.op_dmaoff = 0;
} }
} }
......
...@@ -581,6 +581,8 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, ...@@ -581,6 +581,8 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
rds_message_addref(rm); rds_message_addref(rm);
rm->data.op_dmasg = 0;
rm->data.op_dmaoff = 0;
ic->i_rm = rm; ic->i_rm = rm;
/* Finalize the header */ /* Finalize the header */
...@@ -622,7 +624,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, ...@@ -622,7 +624,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
send = &ic->i_sends[pos]; send = &ic->i_sends[pos];
first = send; first = send;
prev = NULL; prev = NULL;
scat = &rm->data.op_sg[sg]; scat = &rm->data.op_sg[rm->data.op_dmasg];
sent = 0; sent = 0;
i = 0; i = 0;
...@@ -656,10 +658,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, ...@@ -656,10 +658,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
send = &ic->i_sends[pos]; send = &ic->i_sends[pos];
len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); len = min(RDS_FRAG_SIZE,
ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
rds_iw_xmit_populate_wr(ic, send, pos, rds_iw_xmit_populate_wr(ic, send, pos,
ib_sg_dma_address(dev, scat) + off, len, ib_sg_dma_address(dev, scat) + rm->data.op_dmaoff, len,
send_flags); send_flags);
/* /*
* We want to delay signaling completions just enough to get * We want to delay signaling completions just enough to get
...@@ -687,10 +690,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, ...@@ -687,10 +690,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
&send->s_wr, send->s_wr.num_sge, send->s_wr.next); &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
sent += len; sent += len;
off += len; rm->data.op_dmaoff += len;
if (off == ib_sg_dma_len(dev, scat)) { if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
scat++; scat++;
off = 0; rm->data.op_dmaoff = 0;
rm->data.op_dmasg++;
} }
add_header: add_header:
......
...@@ -363,6 +363,8 @@ struct rds_message { ...@@ -363,6 +363,8 @@ struct rds_message {
unsigned int op_active:1; unsigned int op_active:1;
unsigned int op_nents; unsigned int op_nents;
unsigned int op_count; unsigned int op_count;
unsigned int op_dmasg;
unsigned int op_dmaoff;
struct scatterlist *op_sg; struct scatterlist *op_sg;
} data; } data;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment