Commit 833ef3b9 authored by Jeremy Kerr's avatar Jeremy Kerr Committed by David S. Miller

mctp: Populate socket implementation

Start filling-out the socket syscalls: bind, sendmsg & recvmsg.

This requires an input route implementation, so we add to
mctp_route_input, allowing lookups on binds & message tags. This just
handles single-packet messages at present, we will add fragmentation in
a future change.
Signed-off-by: default avatarJeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 831119f8
......@@ -12,6 +12,7 @@
#include <linux/bits.h>
#include <linux/mctp.h>
#include <net/net_namespace.h>
#include <net/sock.h>
/* MCTP packet definitions */
struct mctp_hdr {
......@@ -46,6 +47,64 @@ static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
return (struct mctp_hdr *)skb_network_header(skb);
}
/* socket implementation */
struct mctp_sock {
struct sock sk;
/* bind() params */
int bind_net;
mctp_eid_t bind_addr;
__u8 bind_type;
/* list of mctp_sk_key, for incoming tag lookup. updates protected
* by sk->net->keys_lock
*/
struct hlist_head keys;
};
/* Key for matching incoming packets to sockets or reassembly contexts.
* Packets are matched on (src,dest,tag).
*
* Lifetime requirements:
*
* - keys are free()ed via RCU
*
* - a mctp_sk_key contains a reference to a struct sock; this is valid
* for the life of the key. On sock destruction (through unhash), the key is
* removed from lists (see below), and will not be observable after a RCU
* grace period.
*
* any RX occurring within that grace period may still queue to the socket,
* but will hit the SOCK_DEAD case before the socket is freed.
*
* - these mctp_sk_keys appear on two lists:
* 1) the struct mctp_sock->keys list
* 2) the struct netns_mctp->keys list
*
* updates to either list are performed under the netns_mctp->keys
* lock.
*
* - there is a single destruction path for a mctp_sk_key - through socket
* unhash (see mctp_sk_unhash). This performs the list removal under
* keys_lock.
*/
struct mctp_sk_key {
mctp_eid_t peer_addr;
mctp_eid_t local_addr;
__u8 tag; /* incoming tag match; invert TO for local */
/* we hold a ref to sk when set */
struct sock *sk;
/* routing lookup list */
struct hlist_node hlist;
/* per-socket list */
struct hlist_node sklist;
struct rcu_head rcu;
};
struct mctp_skb_cb {
unsigned int magic;
unsigned int net;
......
......@@ -12,6 +12,19 @@ struct netns_mctp {
/* Only updated under RTNL, entries freed via RCU */
struct list_head routes;
/* Bound sockets: list of sockets bound by type.
* This list is updated from non-atomic contexts (under bind_lock),
* and read (under rcu) in packet rx
*/
struct mutex bind_lock;
struct hlist_head binds;
/* tag allocations. This list is read and updated from atomic contexts,
* but elements are free()ed after a RCU grace-period
*/
spinlock_t keys_lock;
struct hlist_head keys;
/* neighbour table */
struct mutex neigh_lock;
struct list_head neighbours;
......
......@@ -18,10 +18,6 @@
/* socket implementation */
struct mctp_sock {
struct sock sk;
};
static int mctp_release(struct socket *sock)
{
struct sock *sk = sock->sk;
......@@ -36,18 +32,160 @@ static int mctp_release(struct socket *sock)
static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
return 0;
struct sock *sk = sock->sk;
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct sockaddr_mctp *smctp;
int rc;
if (addrlen < sizeof(*smctp))
return -EINVAL;
if (addr->sa_family != AF_MCTP)
return -EAFNOSUPPORT;
if (!capable(CAP_NET_BIND_SERVICE))
return -EACCES;
/* it's a valid sockaddr for MCTP, cast and do protocol checks */
smctp = (struct sockaddr_mctp *)addr;
lock_sock(sk);
/* TODO: allow rebind */
if (sk_hashed(sk)) {
rc = -EADDRINUSE;
goto out_release;
}
msk->bind_net = smctp->smctp_network;
msk->bind_addr = smctp->smctp_addr.s_addr;
msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */
rc = sk->sk_prot->hash(sk);
out_release:
release_sock(sk);
return rc;
}
static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
return 0;
DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
int rc, addrlen = msg->msg_namelen;
struct sock *sk = sock->sk;
struct mctp_skb_cb *cb;
struct mctp_route *rt;
struct sk_buff *skb;
if (addr) {
if (addrlen < sizeof(struct sockaddr_mctp))
return -EINVAL;
if (addr->smctp_family != AF_MCTP)
return -EINVAL;
if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
return -EINVAL;
} else {
/* TODO: connect()ed sockets */
return -EDESTADDRREQ;
}
if (!capable(CAP_NET_RAW))
return -EACCES;
rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
addr->smctp_addr.s_addr);
if (!rt)
return -EHOSTUNREACH;
skb = sock_alloc_send_skb(sk, hlen + 1 + len,
msg->msg_flags & MSG_DONTWAIT, &rc);
if (!skb)
return rc;
skb_reserve(skb, hlen);
/* set type as fist byte in payload */
*(u8 *)skb_put(skb, 1) = addr->smctp_type;
rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
if (rc < 0) {
kfree_skb(skb);
return rc;
}
/* set up cb */
cb = __mctp_cb(skb);
cb->net = addr->smctp_network;
rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
addr->smctp_tag);
return rc ? : len;
}
static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
return 0;
DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
struct sock *sk = sock->sk;
struct sk_buff *skb;
size_t msglen;
u8 type;
int rc;
if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
return -EOPNOTSUPP;
skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
if (!skb)
return rc;
if (!skb->len) {
rc = 0;
goto out_free;
}
/* extract message type, remove from data */
type = *((u8 *)skb->data);
msglen = skb->len - 1;
if (len < msglen)
msg->msg_flags |= MSG_TRUNC;
else
len = msglen;
rc = skb_copy_datagram_msg(skb, 1, msg, len);
if (rc < 0)
goto out_free;
sock_recv_ts_and_drops(msg, sk, skb);
if (addr) {
struct mctp_skb_cb *cb = mctp_cb(skb);
/* TODO: expand mctp_skb_cb for header fields? */
struct mctp_hdr *hdr = mctp_hdr(skb);
hdr = mctp_hdr(skb);
addr = msg->msg_name;
addr->smctp_family = AF_MCTP;
addr->smctp_network = cb->net;
addr->smctp_addr.s_addr = hdr->src;
addr->smctp_type = type;
addr->smctp_tag = hdr->flags_seq_tag &
(MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
msg->msg_namelen = sizeof(*addr);
}
rc = len;
if (flags & MSG_TRUNC)
rc = msglen;
out_free:
skb_free_datagram(sk, skb);
return rc;
}
static int mctp_setsockopt(struct socket *sock, int level, int optname,
......@@ -83,16 +221,63 @@ static const struct proto_ops mctp_dgram_ops = {
.sendpage = sock_no_sendpage,
};
static int mctp_sk_init(struct sock *sk)
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
INIT_HLIST_HEAD(&msk->keys);
return 0;
}
static void mctp_sk_close(struct sock *sk, long timeout)
{
sk_common_release(sk);
}
static int mctp_sk_hash(struct sock *sk)
{
struct net *net = sock_net(sk);
mutex_lock(&net->mctp.bind_lock);
sk_add_node_rcu(sk, &net->mctp.binds);
mutex_unlock(&net->mctp.bind_lock);
return 0;
}
static void mctp_sk_unhash(struct sock *sk)
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct net *net = sock_net(sk);
struct mctp_sk_key *key;
struct hlist_node *tmp;
unsigned long flags;
/* remove from any type-based binds */
mutex_lock(&net->mctp.bind_lock);
sk_del_node_init_rcu(sk);
mutex_unlock(&net->mctp.bind_lock);
/* remove tag allocations */
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
hlist_del_rcu(&key->sklist);
hlist_del_rcu(&key->hlist);
kfree_rcu(key, rcu);
}
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
synchronize_rcu();
}
static struct proto mctp_proto = {
.name = "MCTP",
.owner = THIS_MODULE,
.obj_size = sizeof(struct mctp_sock),
.init = mctp_sk_init,
.close = mctp_sk_close,
.hash = mctp_sk_hash,
.unhash = mctp_sk_unhash,
};
static int mctp_pf_create(struct net *net, struct socket *sock,
......@@ -147,6 +332,10 @@ static __init int mctp_init(void)
{
int rc;
/* ensure our uapi tag definitions match the header format */
BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO);
BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK);
pr_info("mctp: management component transport protocol core\n");
rc = sock_register(&mctp_pf);
......
......@@ -30,10 +30,139 @@ static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
return 0;
}
static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
{
struct mctp_skb_cb *cb = mctp_cb(skb);
struct mctp_hdr *mh;
struct sock *sk;
u8 type;
WARN_ON(!rcu_read_lock_held());
/* TODO: look up in skb->cb? */
mh = mctp_hdr(skb);
if (!skb_headlen(skb))
return NULL;
type = (*(u8 *)skb->data) & 0x7f;
sk_for_each_rcu(sk, &net->mctp.binds) {
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
continue;
if (msk->bind_type != type)
continue;
if (msk->bind_addr != MCTP_ADDR_ANY &&
msk->bind_addr != mh->dest)
continue;
return msk;
}
return NULL;
}
static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
mctp_eid_t peer, u8 tag)
{
if (key->local_addr != local)
return false;
if (key->peer_addr != peer)
return false;
if (key->tag != tag)
return false;
return true;
}
static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
mctp_eid_t peer)
{
struct mctp_sk_key *key, *ret;
struct mctp_hdr *mh;
u8 tag;
WARN_ON(!rcu_read_lock_held());
mh = mctp_hdr(skb);
tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
ret = NULL;
hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
if (mctp_key_match(key, mh->dest, peer, tag)) {
ret = key;
break;
}
}
return ret;
}
static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
{
/* -> to local stack */
/* TODO: socket lookup, reassemble */
struct net *net = dev_net(skb->dev);
struct mctp_sk_key *key;
struct mctp_sock *msk;
struct mctp_hdr *mh;
msk = NULL;
/* we may be receiving a locally-routed packet; drop source sk
* accounting
*/
skb_orphan(skb);
/* ensure we have enough data for a header and a type */
if (skb->len < sizeof(struct mctp_hdr) + 1)
goto drop;
/* grab header, advance data ptr */
mh = mctp_hdr(skb);
skb_pull(skb, sizeof(struct mctp_hdr));
if (mh->ver != 1)
goto drop;
/* TODO: reassembly */
if ((mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
!= (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
goto drop;
rcu_read_lock();
/* 1. lookup socket matching (src,dest,tag) */
key = mctp_lookup_key(net, skb, mh->src);
/* 2. lookup socket macthing (BCAST,dest,tag) */
if (!key)
key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
/* 3. SOM? -> lookup bound socket, conditionally (!EOM) create
* mapping for future (1)/(2).
*/
if (key)
msk = container_of(key->sk, struct mctp_sock, sk);
else if (!msk && (mh->flags_seq_tag & MCTP_HDR_FLAG_SOM))
msk = mctp_lookup_bind(net, skb);
if (!msk)
goto unlock_drop;
sock_queue_rcv_skb(&msk->sk, skb);
rcu_read_unlock();
return 0;
unlock_drop:
rcu_read_unlock();
drop:
kfree_skb(skb);
return 0;
}
......@@ -91,6 +220,80 @@ static struct mctp_route *mctp_route_alloc(void)
return rt;
}
/* tag management */
static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
struct mctp_sock *msk)
{
struct netns_mctp *mns = &net->mctp;
lockdep_assert_held(&mns->keys_lock);
key->sk = &msk->sk;
/* we hold the net->key_lock here, allowing updates to both
* then net and sk
*/
hlist_add_head_rcu(&key->hlist, &mns->keys);
hlist_add_head_rcu(&key->sklist, &msk->keys);
}
/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
* it for the socket msk
*/
static int mctp_alloc_local_tag(struct mctp_sock *msk,
mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
{
struct net *net = sock_net(&msk->sk);
struct netns_mctp *mns = &net->mctp;
struct mctp_sk_key *key, *tmp;
unsigned long flags;
int rc = -EAGAIN;
u8 tagbits;
/* be optimistic, alloc now */
key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key)
return -ENOMEM;
key->local_addr = saddr;
key->peer_addr = daddr;
/* 8 possible tag values */
tagbits = 0xff;
spin_lock_irqsave(&mns->keys_lock, flags);
/* Walk through the existing keys, looking for potential conflicting
* tags. If we find a conflict, clear that bit from tagbits
*/
hlist_for_each_entry(tmp, &mns->keys, hlist) {
/* if we don't own the tag, it can't conflict */
if (tmp->tag & MCTP_HDR_FLAG_TO)
continue;
if ((tmp->peer_addr == daddr ||
tmp->peer_addr == MCTP_ADDR_ANY) &&
tmp->local_addr == saddr)
tagbits &= ~(1 << tmp->tag);
if (!tagbits)
break;
}
if (tagbits) {
key->tag = __ffs(tagbits);
mctp_reserve_tag(net, key, msk);
*tagp = key->tag;
rc = 0;
}
spin_unlock_irqrestore(&mns->keys_lock, flags);
if (!tagbits)
kfree(key);
return rc;
}
/* routing lookups */
static bool mctp_rt_match_eid(struct mctp_route *rt,
unsigned int net, mctp_eid_t eid)
......@@ -140,11 +343,13 @@ int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct mctp_skb_cb *cb = mctp_cb(skb);
struct mctp_hdr *hdr;
unsigned long flags;
mctp_eid_t saddr;
int rc;
u8 tag;
if (WARN_ON(!rt->dev))
return -EINVAL;
......@@ -162,6 +367,15 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
if (rc)
return rc;
if (req_tag & MCTP_HDR_FLAG_TO) {
rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
if (rc)
return rc;
tag |= MCTP_HDR_FLAG_TO;
} else {
tag = req_tag;
}
/* TODO: we have the route MTU here; packetise */
skb_reset_transport_header(skb);
......@@ -171,8 +385,10 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
hdr->ver = 1;
hdr->dest = daddr;
hdr->src = saddr;
hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM; /* TODO */
hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | /* TODO */
tag;
skb->dev = rt->dev->dev;
skb->protocol = htons(ETH_P_MCTP);
skb->priority = 0;
......@@ -529,6 +745,10 @@ static int __net_init mctp_routes_net_init(struct net *net)
struct netns_mctp *ns = &net->mctp;
INIT_LIST_HEAD(&ns->routes);
INIT_HLIST_HEAD(&ns->binds);
mutex_init(&ns->bind_lock);
INIT_HLIST_HEAD(&ns->keys);
spin_lock_init(&ns->keys_lock);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment