Commit d0aa9595 authored by Jeff Garzik's avatar Jeff Garzik

[netdrvr bonding] add 802.3ad support

Contributed by Intel, with updates by
Jay Vosburgh @ IBM (bonding maintainer)
parent eb7040af
#
# Makefile for the Ethernet Bonding driver
#
O_TARGET := bonding.o
obj-y := bond_main.o \
bond_3ad.o \
bond_alb.o
obj-m := $(O_TARGET)
include $(TOPDIR)/Rules.make
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called LICENSE.
*/
#ifndef __BOND_ALB_H__
#define __BOND_ALB_H__
#include <linux/if_ether.h>
struct bonding;
struct slave;
#define BOND_ALB_INFO(bond) ((bond)->alb_info)
#define SLAVE_TLB_INFO(slave) ((slave)->tlb_info)
struct tlb_client_info {
struct slave *tx_slave; /* A pointer to slave used for transmiting
* packets to a Client that the Hash function
* gave this entry index.
*/
u32 tx_bytes; /* Each Client acumulates the BytesTx that
* were tranmitted to it, and after each
* CallBack the LoadHistory is devided
* by the balance interval
*/
u32 load_history; /* This field contains the amount of Bytes
* that were transmitted to this client by
* the server on the previous balance
* interval in Bps.
*/
u32 next; /* The next Hash table entry index, assigned
* to use the same adapter for transmit.
*/
u32 prev; /* The previous Hash table entry index,
* assigned to use the same
*/
};
/* -------------------------------------------------------------------------
* struct rlb_client_info contains all info related to a specific rx client
* connection. This is the Clients Hash Table entry struct
* -------------------------------------------------------------------------
*/
struct rlb_client_info {
u32 ip_src; /* the server IP address */
u32 ip_dst; /* the client IP address */
u8 mac_dst[ETH_ALEN]; /* the client MAC address */
u32 next; /* The next Hash table entry index */
u32 prev; /* The previous Hash table entry index */
u8 assigned; /* checking whether this entry is assigned */
u8 ntt; /* flag - need to transmit client info */
struct slave *slave; /* the slave assigned to this client */
};
struct tlb_slave_info {
u32 head; /* Index to the head of the bi-directional clients
* hash table entries list. The entries in the list
* are the entries that were assigned to use this
* slave for transmit.
*/
u32 load; /* Each slave sums the loadHistory of all clients
* assigned to it
*/
};
struct alb_bond_info {
struct timer_list alb_timer;
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
spinlock_t tx_hashtbl_lock;
u32 unbalanced_load;
int tx_rebalance_counter;
int lp_counter;
/* -------- rlb parameters -------- */
int rlb_enabled;
struct packet_type rlb_pkt_type;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
spinlock_t rx_hashtbl_lock;
u32 rx_hashtbl_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
*/
struct slave *next_rx_slave;/* next slave to be assigned
* to a new rx client for
*/
u32 rlb_interval_counter;
u8 primary_is_promisc; /* boolean */
u32 rlb_promisc_timeout_counter;/* counts primary
* promiscuity time
*/
u32 rlb_update_delay_counter;
u32 rlb_update_retry_counter;/* counter of retries
* of client update
*/
u8 rlb_rebalance; /* flag - indicates that the
* rx traffic should be
* rebalanced
*/
};
int bond_alb_initialize(struct bonding *bond, int rlb_enabled);
void bond_alb_deinitialize(struct bonding *bond);
int bond_alb_init_slave(struct bonding *bond, struct slave *slave);
void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);
void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link);
void bond_alb_assign_current_slave(struct bonding *bond, struct slave *new_slave);
int bond_alb_xmit(struct sk_buff *skb, struct net_device *dev);
void bond_alb_monitor(struct bonding *bond);
#endif /* __BOND_ALB_H__ */
/*
* Bond several ethernet interfaces into a Cisco, running 'Etherchannel'.
*
* Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes
* NCM: Network and Communications Management, Inc.
*
* BUT, I'm the one who modified it for ethernet, so:
* (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov
*
* This software may be used and distributed according to the terms
* of the GNU Public License, incorporated herein by reference.
*
*
* 2003/03/18 - Amir Noam <amir.noam at intel dot com>,
* Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Shmulik Hen <shmulik.hen at intel dot com>
* - Added support for IEEE 802.3ad Dynamic link aggregation mode.
*
* 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Amir Noam <amir.noam at intel dot com>
* - Code beautification and style changes (mainly in comments).
*
* 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com>
* - Added support for Transmit load balancing mode.
*/
#ifndef _LINUX_BONDING_H
#define _LINUX_BONDING_H
#include <linux/timer.h>
#include <linux/proc_fs.h>
#include "bond_3ad.h"
#include "bond_alb.h"
#ifdef BONDING_DEBUG
// use this like so: BOND_PRINT_DBG(("foo = %d, bar = %d", foo, bar));
#define BOND_PRINT_DBG(X) \
do { \
printk(KERN_DEBUG "%s (%d)", __FUNCTION__, __LINE__); \
printk X; \
printk("\n"); \
} while(0)
#else
#define BOND_PRINT_DBG(X)
#endif /* BONDING_DEBUG */
#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \
(netif_running(dev) && netif_carrier_ok(dev)))
/* Checks whether the dev is ready for transmit. We do not check netif_running
* since a device can be stopped by the driver for short periods of time for
* maintainance. dev_queue_xmit() handles this by queing the packet until the
* the dev is running again. Keeping packets ordering requires sticking the
* same dev as much as possible
*/
#define SLAVE_IS_OK(slave) \
((((slave)->dev->flags & (IFF_UP)) == (IFF_UP)) && \
netif_carrier_ok((slave)->dev) && \
((slave)->link == BOND_LINK_UP) && \
((slave)->state == BOND_STATE_ACTIVE))
typedef struct slave {
struct slave *next;
struct slave *prev;
struct net_device *dev;
short delay;
unsigned long jiffies;
char link; /* one of BOND_LINK_XXXX */
char state; /* one of BOND_STATE_XXXX */
unsigned short original_flags;
u32 link_failure_count;
u16 speed;
u8 duplex;
u8 perm_hwaddr[ETH_ALEN];
struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */
struct tlb_slave_info tlb_info;
} slave_t;
/*
* Here are the locking policies for the two bonding locks:
*
* 1) Get bond->lock when reading/writing slave list.
* 2) Get bond->ptrlock when reading/writing bond->current_slave.
* (It is unnecessary when the write-lock is put with bond->lock.)
* 3) When we lock with bond->ptrlock, we must lock with bond->lock
* beforehand.
*/
typedef struct bonding {
slave_t *next;
slave_t *prev;
slave_t *current_slave;
slave_t *primary_slave;
slave_t *current_arp_slave;
__s32 slave_cnt;
rwlock_t lock;
rwlock_t ptrlock;
struct timer_list mii_timer;
struct timer_list arp_timer;
struct net_device_stats *stats;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *bond_proc_dir;
struct proc_dir_entry *bond_proc_info_file;
#endif /* CONFIG_PROC_FS */
struct bonding *next_bond;
struct net_device *device;
struct dev_mc_list *mc_list;
unsigned short flags;
struct ad_bond_info ad_info;
struct alb_bond_info alb_info;
} bonding_t;
/* Forward declarations */
void bond_set_slave_active_flags(slave_t *slave);
void bond_set_slave_inactive_flags(slave_t *slave);
/**
* These functions can be used for iterating the slave list
* (which is circular)
* Caller must hold bond lock for read
*/
extern inline struct slave*
bond_get_first_slave(struct bonding *bond)
{
/* if there are no slaves return NULL */
if (bond->next == (slave_t *)bond) {
return NULL;
}
return bond->next;
}
/**
* Caller must hold bond lock for read
*/
extern inline struct slave*
bond_get_next_slave(struct bonding *bond, struct slave *slave)
{
/* If we have reached the last slave return NULL */
if (slave->next == bond->next) {
return NULL;
}
return slave->next;
}
/**
* Returns NULL if the net_device does not belong to any of the bond's slaves
*
* Caller must hold bond lock for read
*/
extern inline struct slave*
bond_get_slave_by_dev(struct bonding *bond, struct net_device *slave_dev)
{
struct slave *our_slave = bond->next;
/* check if the list of slaves is empty */
if (our_slave == (slave_t *)bond) {
return NULL;
}
for (; our_slave; our_slave = bond_get_next_slave(bond, our_slave)) {
if (our_slave->dev == slave_dev) {
break;
}
}
return our_slave;
}
extern inline struct bonding*
bond_get_bond_by_slave(struct slave *slave)
{
if (!slave || !slave->dev->master) {
return NULL;
}
return (struct bonding *)(slave->dev->master->priv);
}
#endif /* _LINUX_BONDING_H */
...@@ -11,18 +11,38 @@ ...@@ -11,18 +11,38 @@
* This software may be used and distributed according to the terms * This software may be used and distributed according to the terms
* of the GNU Public License, incorporated herein by reference. * of the GNU Public License, incorporated herein by reference.
* *
* 2003/03/18 - Amir Noam <amir.noam at intel dot com>
* - Added support for getting slave's speed and duplex via ethtool.
* Needed for 802.3ad and other future modes.
*
* 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Shmulik Hen <shmulik.hen at intel dot com>
* - Enable support of modes that need to use the unique mac address of
* each slave.
*
* 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Amir Noam <amir.noam at intel dot com>
* - Moved driver's private data types to bonding.h
*
* 2003/03/18 - Amir Noam <amir.noam at intel dot com>,
* Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Shmulik Hen <shmulik.hen at intel dot com>
* - Added support for IEEE 802.3ad Dynamic link aggregation mode.
*
* 2003/05/01 - Amir Noam <amir.noam at intel dot com>
* - Added ABI version control to restore compatibility between
* new/old ifenslave and new/old bonding.
*/ */
#ifndef _LINUX_IF_BONDING_H #ifndef _LINUX_IF_BONDING_H
#define _LINUX_IF_BONDING_H #define _LINUX_IF_BONDING_H
#ifdef __KERNEL__
#include <linux/timer.h>
#include <linux/if.h> #include <linux/if.h>
#include <linux/proc_fs.h>
#endif /* __KERNEL__ */
#include <linux/types.h> #include <linux/types.h>
#include <linux/if_ether.h>
/* userland - kernel ABI version (2003/05/08) */
#define BOND_ABI_VERSION 1
/* /*
* We can remove these ioctl definitions in 2.5. People should use the * We can remove these ioctl definitions in 2.5. People should use the
...@@ -41,6 +61,9 @@ ...@@ -41,6 +61,9 @@
#define BOND_MODE_ACTIVEBACKUP 1 #define BOND_MODE_ACTIVEBACKUP 1
#define BOND_MODE_XOR 2 #define BOND_MODE_XOR 2
#define BOND_MODE_BROADCAST 3 #define BOND_MODE_BROADCAST 3
#define BOND_MODE_8023AD 4
#define BOND_MODE_TLB 5
#define BOND_MODE_ALB 6 /* TLB + RLB (receive load balancing) */
/* each slave's link has 4 states */ /* each slave's link has 4 states */
#define BOND_LINK_UP 0 /* link is up and running */ #define BOND_LINK_UP 0 /* link is up and running */
...@@ -58,11 +81,6 @@ ...@@ -58,11 +81,6 @@
#define BOND_MULTICAST_ACTIVE 1 #define BOND_MULTICAST_ACTIVE 1
#define BOND_MULTICAST_ALL 2 #define BOND_MULTICAST_ALL 2
struct bond_parm_tbl {
char *modename;
int mode;
};
typedef struct ifbond { typedef struct ifbond {
__s32 bond_mode; __s32 bond_mode;
__s32 num_slaves; __s32 num_slaves;
...@@ -78,52 +96,15 @@ typedef struct ifslave ...@@ -78,52 +96,15 @@ typedef struct ifslave
__u32 link_failure_count; __u32 link_failure_count;
} ifslave; } ifslave;
#ifdef __KERNEL__ struct ad_info {
typedef struct slave { __u16 aggregator_id;
struct slave *next; __u16 ports;
struct slave *prev; __u16 actor_key;
struct net_device *dev; __u16 partner_key;
short delay; __u8 partner_system[ETH_ALEN];
unsigned long jiffies; };
char link; /* one of BOND_LINK_XXXX */
char state; /* one of BOND_STATE_XXXX */
unsigned short original_flags;
u32 link_failure_count;
} slave_t;
/*
* Here are the locking policies for the two bonding locks:
*
* 1) Get bond->lock when reading/writing slave list.
* 2) Get bond->ptrlock when reading/writing bond->current_slave.
* (It is unnecessary when the write-lock is put with bond->lock.)
* 3) When we lock with bond->ptrlock, we must lock with bond->lock
* beforehand.
*/
typedef struct bonding {
slave_t *next;
slave_t *prev;
slave_t *current_slave;
slave_t *primary_slave;
slave_t *current_arp_slave;
__s32 slave_cnt;
rwlock_t lock;
rwlock_t ptrlock;
struct timer_list mii_timer;
struct timer_list arp_timer;
struct net_device_stats *stats;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *bond_proc_dir;
struct proc_dir_entry *bond_proc_info_file;
#endif /* CONFIG_PROC_FS */
struct bonding *next_bond;
struct net_device *device;
struct dev_mc_list *mc_list;
unsigned short flags;
} bonding_t;
#endif /* __KERNEL__ */
#endif /* _LINUX_BOND_H */ #endif /* _LINUX_IF_BONDING_H */
/* /*
* Local variables: * Local variables:
......
...@@ -148,6 +148,7 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, ...@@ -148,6 +148,7 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
{ {
struct net_device_stats *stats; struct net_device_stats *stats;
skb->real_dev = skb->dev;
skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK];
if (skb->dev == NULL) { if (skb->dev == NULL) {
kfree_skb(skb); kfree_skb(skb);
......
...@@ -190,6 +190,7 @@ struct sk_buff { ...@@ -190,6 +190,7 @@ struct sk_buff {
struct sock *sk; struct sock *sk;
struct timeval stamp; struct timeval stamp;
struct net_device *dev; struct net_device *dev;
struct net_device *real_dev;
union { union {
struct tcphdr *th; struct tcphdr *th;
......
...@@ -1385,8 +1385,10 @@ static __inline__ void skb_bond(struct sk_buff *skb) ...@@ -1385,8 +1385,10 @@ static __inline__ void skb_bond(struct sk_buff *skb)
{ {
struct net_device *dev = skb->dev; struct net_device *dev = skb->dev;
if (dev->master) if (dev->master) {
skb->real_dev = skb->dev;
skb->dev = dev->master; skb->dev = dev->master;
}
} }
static void net_tx_action(struct softirq_action *h) static void net_tx_action(struct softirq_action *h)
......
...@@ -271,6 +271,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) ...@@ -271,6 +271,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
n->sk = NULL; n->sk = NULL;
C(stamp); C(stamp);
C(dev); C(dev);
C(real_dev);
C(h); C(h);
C(nh); C(nh);
C(mac); C(mac);
...@@ -334,6 +335,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) ...@@ -334,6 +335,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->list = NULL; new->list = NULL;
new->sk = NULL; new->sk = NULL;
new->dev = old->dev; new->dev = old->dev;
new->real_dev = old->real_dev;
new->priority = old->priority; new->priority = old->priority;
new->protocol = old->protocol; new->protocol = old->protocol;
new->dst = dst_clone(old->dst); new->dst = dst_clone(old->dst);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment