Commit c356dc4b authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) Fix leak of unqueued fragments in ipv6 nf_defrag, from Guillaume
    Nault.

 2) Don't access the DDM interface unless the transceiver implements it
    in bnx2x, from Mauro S. M. Rodrigues.

 3) Don't double fetch 'len' from userspace in sock_getsockopt(), from
    JingYi Hou.

 4) Sign extension overflow in lio_core, from Colin Ian King.

 5) Various netem bug fixes wrt. corrupted packets from Jakub Kicinski.

 6) Fix epollout hang in hvsock, from Sunil Muthuswamy.

 7) Fix regression in default fib6_type, from David Ahern.

 8) Handle memory limits in tcp_fragment more appropriately, from Eric
    Dumazet.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (24 commits)
  tcp: refine memory limit test in tcp_fragment()
  inet: clear num_timeout reqsk_alloc()
  net: mvpp2: debugfs: Add pmap to fs dump
  ipv6: Default fib6_type to RTN_UNICAST when not set
  net: hns3: Fix inconsistent indenting
  net/af_iucv: always register net_device notifier
  net/af_iucv: build proper skbs for HiperTransport
  net/af_iucv: remove GFP_DMA restriction for HiperTransport
  net: dsa: mv88e6xxx: fix shift of FID bits in mv88e6185_g1_vtu_loadpurge()
  hvsock: fix epollout hang from race condition
  net/udp_gso: Allow TX timestamp with UDP GSO
  net: netem: fix use after free and double free with packet corruption
  net: netem: fix backlog accounting for corrupted GSO frames
  net: lio_core: fix potential sign-extension overflow on large shift
  tipc: pass tunnel dev as NULL to udp_tunnel(6)_xmit_skb
  ip6_tunnel: allow not to count pkts on tstats by passing dev as NULL
  ip_tunnel: allow not to count pkts on tstats by setting skb's dev to NULL
  tun: wake up waitqueues after IFF_UP is set
  net: remove duplicate fetch in sock_getsockopt
  tipc: fix issues with early FAILOVER_MSG from peer
  ...
parents 121bddf3 b6653b36
......@@ -415,7 +415,7 @@ int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
* VTU DBNum[7:4] are located in VTU Operation 11:8
*/
op |= entry->fid & 0x000f;
op |= (entry->fid & 0x00f0) << 8;
op |= (entry->fid & 0x00f0) << 4;
}
return mv88e6xxx_g1_vtu_op(chip, op);
......
......@@ -1609,7 +1609,8 @@ static int bnx2x_get_module_info(struct net_device *dev,
}
if (!sff8472_comp ||
(diag_type & SFP_EEPROM_DIAG_ADDR_CHANGE_REQ)) {
(diag_type & SFP_EEPROM_DIAG_ADDR_CHANGE_REQ) ||
!(diag_type & SFP_EEPROM_DDM_IMPLEMENTED)) {
modinfo->type = ETH_MODULE_SFF_8079;
modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
} else {
......
......@@ -62,6 +62,7 @@
#define SFP_EEPROM_DIAG_TYPE_ADDR 0x5c
#define SFP_EEPROM_DIAG_TYPE_SIZE 1
#define SFP_EEPROM_DIAG_ADDR_CHANGE_REQ (1<<2)
#define SFP_EEPROM_DDM_IMPLEMENTED (1<<6)
#define SFP_EEPROM_SFF_8472_COMP_ADDR 0x5e
#define SFP_EEPROM_SFF_8472_COMP_SIZE 1
......
......@@ -964,7 +964,7 @@ static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
if (droq->ops.poll_mode) {
droq->ops.napi_fn(droq);
oct_priv->napi_mask |= (1 << oq_no);
oct_priv->napi_mask |= BIT_ULL(oq_no);
} else {
tasklet_schedule(&oct_priv->droq_tasklet);
}
......
......@@ -3803,7 +3803,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
ret = hns3_client_start(handle);
if (ret) {
dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
goto out_client_start;
goto out_client_start;
}
hns3_dcbnl_setup(handle);
......
......@@ -566,6 +566,9 @@ static int mvpp2_dbgfs_prs_entry_init(struct dentry *parent,
debugfs_create_file("hits", 0444, prs_entry_dir, entry,
&mvpp2_dbgfs_prs_hits_fops);
debugfs_create_file("pmap", 0444, prs_entry_dir, entry,
&mvpp2_dbgfs_prs_pmap_fops);
return 0;
}
......
......@@ -1014,18 +1014,8 @@ static void tun_net_uninit(struct net_device *dev)
/* Net device open. */
static int tun_net_open(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
int i;
netif_tx_start_all_queues(dev);
for (i = 0; i < tun->numqueues; i++) {
struct tun_file *tfile;
tfile = rtnl_dereference(tun->tfiles[i]);
tfile->socket.sk->sk_write_space(tfile->socket.sk);
}
return 0;
}
......@@ -3634,6 +3624,7 @@ static int tun_device_event(struct notifier_block *unused,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct tun_struct *tun = netdev_priv(dev);
int i;
if (dev->rtnl_link_ops != &tun_link_ops)
return NOTIFY_DONE;
......@@ -3643,6 +3634,14 @@ static int tun_device_event(struct notifier_block *unused,
if (tun_queue_resize(tun))
return NOTIFY_BAD;
break;
case NETDEV_UP:
for (i = 0; i < tun->numqueues; i++) {
struct tun_file *tfile;
tfile = rtnl_dereference(tun->tfiles[i]);
tfile->socket.sk->sk_write_space(tfile->socket.sk);
}
break;
default:
break;
}
......
......@@ -158,9 +158,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
pkt_len = skb->len - skb_inner_network_offset(skb);
err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
if (unlikely(net_xmit_eval(err)))
pkt_len = -1;
iptunnel_xmit_stats(dev, pkt_len);
if (dev) {
if (unlikely(net_xmit_eval(err)))
pkt_len = -1;
iptunnel_xmit_stats(dev, pkt_len);
}
}
#endif
#endif
......@@ -97,6 +97,9 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
sk_node_init(&req_to_sk(req)->sk_node);
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
req->num_timeout = 0;
req->num_retrans = 0;
req->sk = NULL;
refcount_set(&req->rsk_refcnt, 0);
return req;
......
......@@ -1477,9 +1477,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
{
u32 meminfo[SK_MEMINFO_VARS];
if (get_user(len, optlen))
return -EFAULT;
sk_get_meminfo(sk, meminfo);
len = min_t(unsigned int, len, sizeof(meminfo));
......
......@@ -752,10 +752,6 @@ static void reqsk_timer_handler(struct timer_list *t)
static void reqsk_queue_hash_req(struct request_sock *req,
unsigned long timeout)
{
req->num_retrans = 0;
req->num_timeout = 0;
req->sk = NULL;
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
......
......@@ -76,9 +76,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
err = ip_local_out(net, sk, skb);
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
iptunnel_xmit_stats(dev, pkt_len);
if (dev) {
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
iptunnel_xmit_stats(dev, pkt_len);
}
}
EXPORT_SYMBOL_GPL(iptunnel_xmit);
......
......@@ -221,10 +221,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct sock *child;
bool own_req;
req->num_retrans = 0;
req->num_timeout = 0;
req->sk = NULL;
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
NULL, &own_req);
if (!child)
......
......@@ -1296,7 +1296,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (nsize < 0)
nsize = 0;
if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
return -ENOMEM;
}
......
......@@ -224,6 +224,11 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
seg = segs;
uh = udp_hdr(seg);
/* preserve TX timestamp flags and TS key for first segment */
skb_shinfo(seg)->tskey = skb_shinfo(gso_skb)->tskey;
skb_shinfo(seg)->tx_flags |=
(skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP);
/* compute checksum adjustment based on old length versus new */
newlen = htons(sizeof(*uh) + mss);
check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
......
......@@ -261,8 +261,14 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
prev = fq->q.fragments_tail;
err = inet_frag_queue_insert(&fq->q, skb, offset, end);
if (err)
if (err) {
if (err == IPFRAG_DUP) {
/* No error for duplicates, pretend they got queued. */
kfree_skb(skb);
return -EINPROGRESS;
}
goto insert_error;
}
if (dev)
fq->iif = dev->ifindex;
......@@ -289,15 +295,17 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
skb->_skb_refdst = 0UL;
err = nf_ct_frag6_reasm(fq, skb, prev, dev);
skb->_skb_refdst = orefdst;
return err;
/* After queue has assumed skb ownership, only 0 or
* -EINPROGRESS must be returned.
*/
return err ? -EINPROGRESS : 0;
}
skb_dst_drop(skb);
return -EINPROGRESS;
insert_error:
if (err == IPFRAG_DUP)
goto err;
inet_frag_kill(&fq->q);
err:
skb_dst_drop(skb);
......@@ -476,12 +484,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
ret = 0;
}
/* after queue has assumed skb ownership, only 0 or -EINPROGRESS
* must be returned.
*/
if (ret)
ret = -EINPROGRESS;
spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q);
return ret;
......
......@@ -3184,7 +3184,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->fib6_table = table;
rt->fib6_metric = cfg->fc_metric;
rt->fib6_type = cfg->fc_type;
rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
......
......@@ -14,6 +14,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/errno.h>
......@@ -347,14 +348,14 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
if (imsg)
memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message));
skb_push(skb, ETH_HLEN);
memset(skb->data, 0, ETH_HLEN);
skb->dev = iucv->hs_dev;
if (!skb->dev) {
err = -ENODEV;
goto err_free;
}
dev_hard_header(skb, skb->dev, ETH_P_AF_IUCV, NULL, NULL, skb->len);
if (!(skb->dev->flags & IFF_UP) || !netif_carrier_ok(skb->dev)) {
err = -ENETDOWN;
goto err_free;
......@@ -367,6 +368,8 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
skb_trim(skb, skb->dev->mtu);
}
skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
__skb_header_release(skb);
nskb = skb_clone(skb, GFP_ATOMIC);
if (!nskb) {
err = -ENOMEM;
......@@ -466,12 +469,14 @@ static void iucv_sever_path(struct sock *sk, int with_user_data)
/* Send controlling flags through an IUCV socket for HIPER transport */
static int iucv_send_ctrl(struct sock *sk, u8 flags)
{
struct iucv_sock *iucv = iucv_sk(sk);
int err = 0;
int blen;
struct sk_buff *skb;
u8 shutdown = 0;
blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
blen = sizeof(struct af_iucv_trans_hdr) +
LL_RESERVED_SPACE(iucv->hs_dev);
if (sk->sk_shutdown & SEND_SHUTDOWN) {
/* controlling flags should be sent anyway */
shutdown = sk->sk_shutdown;
......@@ -588,7 +593,6 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio,
sk->sk_destruct = iucv_sock_destruct;
sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
sk->sk_allocation = GFP_DMA;
sock_reset_flag(sk, SOCK_ZAPPED);
......@@ -782,6 +786,7 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
memcpy(iucv->src_user_id, iucv_userid, 8);
sk->sk_state = IUCV_BOUND;
iucv->transport = AF_IUCV_TRANS_IUCV;
sk->sk_allocation |= GFP_DMA;
if (!iucv->msglimit)
iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
goto done_unlock;
......@@ -806,6 +811,8 @@ static int iucv_sock_autobind(struct sock *sk)
return -EPROTO;
memcpy(iucv->src_user_id, iucv_userid, 8);
iucv->transport = AF_IUCV_TRANS_IUCV;
sk->sk_allocation |= GFP_DMA;
write_lock_bh(&iucv_sk_list.lock);
__iucv_auto_name(iucv);
......@@ -1131,7 +1138,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
* segmented records using the MSG_EOR flag), but
* for SOCK_STREAM we might want to improve it in future */
if (iucv->transport == AF_IUCV_TRANS_HIPER) {
headroom = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
headroom = sizeof(struct af_iucv_trans_hdr) +
LL_RESERVED_SPACE(iucv->hs_dev);
linear = len;
} else {
if (len < PAGE_SIZE) {
......@@ -1781,6 +1789,8 @@ static int iucv_callback_connreq(struct iucv_path *path,
niucv = iucv_sk(nsk);
iucv_sock_init(nsk, sk);
niucv->transport = AF_IUCV_TRANS_IUCV;
nsk->sk_allocation |= GFP_DMA;
/* Set the new iucv_sock */
memcpy(niucv->dst_name, ipuser + 8, 8);
......@@ -2430,6 +2440,13 @@ static int afiucv_iucv_init(void)
return err;
}
static void afiucv_iucv_exit(void)
{
device_unregister(af_iucv_dev);
driver_unregister(&af_iucv_driver);
pr_iucv->iucv_unregister(&af_iucv_handler, 0);
}
static int __init afiucv_init(void)
{
int err;
......@@ -2463,11 +2480,18 @@ static int __init afiucv_init(void)
err = afiucv_iucv_init();
if (err)
goto out_sock;
} else
register_netdevice_notifier(&afiucv_netdev_notifier);
}
err = register_netdevice_notifier(&afiucv_netdev_notifier);
if (err)
goto out_notifier;
dev_add_pack(&iucv_packet_type);
return 0;
out_notifier:
if (pr_iucv)
afiucv_iucv_exit();
out_sock:
sock_unregister(PF_IUCV);
out_proto:
......@@ -2481,12 +2505,11 @@ static int __init afiucv_init(void)
static void __exit afiucv_exit(void)
{
if (pr_iucv) {
device_unregister(af_iucv_dev);
driver_unregister(&af_iucv_driver);
pr_iucv->iucv_unregister(&af_iucv_handler, 0);
afiucv_iucv_exit();
symbol_put(iucv_if);
} else
unregister_netdevice_notifier(&afiucv_netdev_notifier);
}
unregister_netdevice_notifier(&afiucv_netdev_notifier);
dev_remove_pack(&iucv_packet_type);
sock_unregister(PF_IUCV);
proto_unregister(&iucv_proto);
......
......@@ -304,5 +304,4 @@ module_exit(nft_masq_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
MODULE_ALIAS_NFT_EXPR("masq");
......@@ -291,5 +291,4 @@ module_exit(nft_redir_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
MODULE_ALIAS_NFT_EXPR("nat");
......@@ -439,8 +439,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct netem_skb_cb *cb;
struct sk_buff *skb2;
struct sk_buff *segs = NULL;
unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
int nb = 0;
unsigned int prev_len = qdisc_pkt_len(skb);
int count = 1;
int rc = NET_XMIT_SUCCESS;
int rc_drop = NET_XMIT_DROP;
......@@ -494,16 +493,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
if (skb_is_gso(skb)) {
segs = netem_segment(skb, sch, to_free);
if (!segs)
skb = netem_segment(skb, sch, to_free);
if (!skb)
return rc_drop;
} else {
segs = skb;
segs = skb->next;
skb_mark_not_on_list(skb);
qdisc_skb_cb(skb)->pkt_len = skb->len;
}
skb = segs;
segs = segs->next;
skb = skb_unshare(skb, GFP_ATOMIC);
if (unlikely(!skb)) {
qdisc_qstats_drop(sch);
......@@ -520,6 +517,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
if (unlikely(sch->q.qlen >= sch->limit)) {
/* re-link segs, so that qdisc_drop_all() frees them all */
skb->next = segs;
qdisc_drop_all(skb, sch, to_free);
return rc_drop;
}
......@@ -593,6 +592,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
finish_segs:
if (segs) {
unsigned int len, last_len;
int nb = 0;
len = skb->len;
while (segs) {
skb2 = segs->next;
skb_mark_not_on_list(segs);
......@@ -608,9 +612,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
segs = skb2;
}
sch->q.qlen += nb;
if (nb > 1)
qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
qdisc_tree_reduce_backlog(sch, -nb, prev_len - len);
}
return NET_XMIT_SUCCESS;
}
......
......@@ -1728,7 +1728,6 @@ void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
* node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
* would have to start over from scratch instead.
*/
WARN_ON(l && tipc_link_is_up(l));
tnl->drop_point = 1;
tnl->failover_reasm_skb = NULL;
......
......@@ -766,9 +766,9 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
* disturbance, wrong session, etc.)
* 3. Link <1B-2B> up
* 4. Link endpoint 2A down (e.g. due to link tolerance timeout)
* 5. Node B starts failover onto link <1B-2B>
* 5. Node 2 starts failover onto link <1B-2B>
*
* ==> Node A does never start link/node failover!
* ==> Node 1 does never start link/node failover!
*
* @n: tipc node structure
* @l: link peer endpoint failingover (- can be NULL)
......@@ -783,6 +783,10 @@ static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l,
if (!tipc_link_is_up(tnl))
return;
/* Don't rush, failure link may be in the process of resetting */
if (l && !tipc_link_is_reset(l))
return;
tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
......@@ -1706,7 +1710,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
/* Initiate or update failover mode if applicable */
if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) {
syncpt = oseqno + exp_pkts - 1;
if (pl && tipc_link_is_up(pl)) {
if (pl && !tipc_link_is_reset(pl)) {
__tipc_node_link_down(n, &pb_id, xmitq, &maddr);
trace_tipc_node_link_down(n, true,
"node link down <- failover!");
......
......@@ -176,7 +176,6 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
goto tx_error;
}
skb->dev = rt->dst.dev;
ttl = ip4_dst_hoplimit(&rt->dst);
udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
dst->ipv4.s_addr, 0, ttl, 0, src->port,
......@@ -195,10 +194,9 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
if (err)
goto tx_error;
ttl = ip6_dst_hoplimit(ndst);
err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
ndst->dev, &src->ipv6,
&dst->ipv6, 0, ttl, 0, src->port,
dst->port, false);
err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
&src->ipv6, &dst->ipv6, 0, ttl, 0,
src->port, dst->port, false);
#endif
}
return err;
......
......@@ -211,18 +211,6 @@ static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
set_channel_pending_send_size(chan,
HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
/* See hvs_stream_has_space(): we must make sure the host has seen
* the new pending send size, before we can re-check the writable
* bytes.
*/
virt_mb();
}
static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
{
set_channel_pending_send_size(chan, 0);
/* Ditto */
virt_mb();
}
......@@ -292,9 +280,6 @@ static void hvs_channel_cb(void *ctx)
if (hvs_channel_readable(chan))
sk->sk_data_ready(sk);
/* See hvs_stream_has_space(): when we reach here, the writable bytes
* may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
*/
if (hv_get_bytes_to_write(&chan->outbound) > 0)
sk->sk_write_space(sk);
}
......@@ -395,6 +380,13 @@ static void hvs_open_connection(struct vmbus_channel *chan)
set_per_channel_state(chan, conn_from_host ? new : sk);
vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
/* Set the pending send size to max packet size to always get
* notifications from the host when there is enough writable space.
* The host is optimized to send notifications only when the pending
* size boundary is crossed, and not always.
*/
hvs_set_channel_pending_send_size(chan);
if (conn_from_host) {
new->sk_state = TCP_ESTABLISHED;
sk->sk_ack_backlog++;
......@@ -688,23 +680,8 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk)
static s64 hvs_stream_has_space(struct vsock_sock *vsk)
{
struct hvsock *hvs = vsk->trans;
struct vmbus_channel *chan = hvs->chan;
s64 ret;
ret = hvs_channel_writable_bytes(chan);
if (ret > 0) {
hvs_clear_channel_pending_send_size(chan);
} else {
/* See hvs_channel_cb() */
hvs_set_channel_pending_send_size(chan);
/* Re-check the writable bytes to avoid race */
ret = hvs_channel_writable_bytes(chan);
if (ret > 0)
hvs_clear_channel_pending_send_size(chan);
}
return ret;
return hvs_channel_writable_bytes(hvs->chan);
}
static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment