Commit 684a95c0 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-ipv4-unicast-routing'

Jiri Pirko says:

====================
mlxsw: Implement IPV4 unicast routing

This patchset enables IPv4 unicast routing in the Mellanox Spectrum ASIC
switch driver. This builds upon the work that was done by a couple of
previous patchsets.

Patches 1,2,6 add a couple of dependencies outside the driver. Namely, the
ability to propagate ndo_neigh_construct()/destroy() through stacked devices and
a notification whenever DELAY_PROBE_TIME changes. When propagated down, the
ndos allow drivers to add and remove neighbour entries from their private
neighbour table. The DELAY_PROBE_TIME notification gives drivers the ability to
correctly configure their polling interval for neighbour activity, so that
active neighbour won't be marked as STALE.

Patches 3-5,7-8 add the neighbour offloading infrastructure, where patch 7 uses
the DELAY_PROBE_TIME notification in order to correctly configure the device's
polling interval. Patch 8 finally programs neighbours to the device's table
based on NEIGH_UPDATE notifications, so that directly connected routes can
be used.

Patches 9-16 build upon the previous patches and extend the router with
remote routes (nexthop) support.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9046a745 0b2361d9
......@@ -4137,6 +4137,8 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_add_slave = bond_enslave,
.ndo_del_slave = bond_release,
.ndo_fix_features = bond_fix_features,
.ndo_neigh_construct = netdev_default_l2upper_neigh_construct,
.ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy,
.ndo_bridge_setlink = switchdev_port_bridge_setlink,
.ndo_bridge_getlink = switchdev_port_bridge_getlink,
.ndo_bridge_dellink = switchdev_port_bridge_dellink,
......
......@@ -7,5 +7,6 @@ obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o
mlxsw_switchx2-objs := switchx2.o
obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o
mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_switchdev.o spectrum_router.o
spectrum_switchdev.o spectrum_router.o \
spectrum_kvdl.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
......@@ -607,6 +607,24 @@ MLXSW_ITEM32(cmd_mbox, config_profile,
*/
MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1);
/* cmd_mbox_config_set_kvd_linear_size
* Capability bit. Setting a bit to 1 configures the profile
* according to the mailbox contents.
*/
MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1);
/* cmd_mbox_config_set_kvd_hash_single_size
* Capability bit. Setting a bit to 1 configures the profile
* according to the mailbox contents.
*/
MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1);
/* cmd_mbox_config_set_kvd_hash_double_size
* Capability bit. Setting a bit to 1 configures the profile
* according to the mailbox contents.
*/
MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1);
/* cmd_mbox_config_profile_max_vepa_channels
* Maximum number of VEPA channels per port (0 through 16)
* 0 - multi-channel VEPA is disabled
......@@ -733,6 +751,31 @@ MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16);
*/
MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1);
/* cmd_mbox_config_kvd_linear_size
* KVD Linear Size
* Valid for Spectrum only
* Allowed values are 128*N where N=0 or higher
*/
MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24);
/* cmd_mbox_config_kvd_hash_single_size
* KVD Hash single-entries size
* Valid for Spectrum only
* Allowed values are 128*N where N=0 or higher
* Must be greater or equal to cap_min_kvd_hash_single_size
* Must be smaller or equal to cap_kvd_size - kvd_linear_size
*/
MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24);
/* cmd_mbox_config_kvd_hash_double_size
* KVD Hash double-entries size (units of single-size entries)
* Valid for Spectrum only
* Allowed values are 128*N where N=0 or higher
* Must be either 0 or greater or equal to cap_min_kvd_hash_double_size
* Must be smaller or equal to cap_kvd_size - kvd_linear_size
*/
MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24);
/* cmd_mbox_config_profile_swid_config_mask
* Modify Switch Partition Configuration mask. When set, the configu-
* ration value for the Switch Partition are taken from the mailbox.
......
......@@ -190,7 +190,8 @@ struct mlxsw_config_profile {
used_max_ib_mc:1,
used_max_pkey:1,
used_ar_sec:1,
used_adaptive_routing_group_cap:1;
used_adaptive_routing_group_cap:1,
used_kvd_sizes:1;
u8 max_vepa_channels;
u16 max_lag;
u16 max_port_per_lag;
......@@ -211,6 +212,9 @@ struct mlxsw_config_profile {
u8 ar_sec;
u16 adaptive_routing_group_cap;
u8 arn;
u32 kvd_linear_size;
u32 kvd_hash_single_size;
u32 kvd_hash_double_size;
struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
};
......
......@@ -1255,6 +1255,20 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
mbox, profile->adaptive_routing_group_cap);
}
if (profile->used_kvd_sizes) {
mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(
mbox, 1);
mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(
mbox, profile->kvd_linear_size);
mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(
mbox, 1);
mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(
mbox, profile->kvd_hash_single_size);
mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set(
mbox, 1);
mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(
mbox, profile->kvd_hash_double_size);
}
for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i,
......
......@@ -4,6 +4,7 @@
* Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
* Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
* Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......@@ -3454,6 +3455,137 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
}
/* RATR - Router Adjacency Table Register
* --------------------------------------
* The RATR register is used to configure the Router Adjacency (next-hop)
* Table.
*/
#define MLXSW_REG_RATR_ID 0x8008
#define MLXSW_REG_RATR_LEN 0x2C
static const struct mlxsw_reg_info mlxsw_reg_ratr = {
.id = MLXSW_REG_RATR_ID,
.len = MLXSW_REG_RATR_LEN,
};
enum mlxsw_reg_ratr_op {
/* Read */
MLXSW_REG_RATR_OP_QUERY_READ = 0,
/* Read and clear activity */
MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2,
/* Write Adjacency entry */
MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1,
/* Write Adjacency entry only if the activity is cleared.
* The write may not succeed if the activity is set. There is not
* direct feedback if the write has succeeded or not, however
* the get will reveal the actual entry (SW can compare the get
* response to the set command).
*/
MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3,
};
/* reg_ratr_op
* Note that Write operation may also be used for updating
* counter_set_type and counter_index. In this case all other
* fields must not be updated.
* Access: OP
*/
MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4);
/* reg_ratr_v
* Valid bit. Indicates if the adjacency entry is valid.
* Note: the device may need some time before reusing an invalidated
* entry. During this time the entry can not be reused. It is
* recommended to use another entry before reusing an invalidated
* entry (e.g. software can put it at the end of the list for
* reusing). Trying to access an invalidated entry not yet cleared
* by the device results with failure indicating "Try Again" status.
* When valid is '0' then egress_router_interface,trap_action,
* adjacency_parameters and counters are reserved
* Access: RW
*/
MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1);
/* reg_ratr_a
* Activity. Set for new entries. Set if a packet lookup has hit on
* the specific entry. To clear the a bit, use "clear activity".
* Access: RO
*/
MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1);
/* reg_ratr_adjacency_index_low
* Bits 15:0 of index into the adjacency table.
* For SwitchX and SwitchX-2, the adjacency table is linear and
* used for adjacency entries only.
* For Spectrum, the index is to the KVD linear.
* Access: Index
*/
MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16);
/* reg_ratr_egress_router_interface
* Range is 0 .. cap_max_router_interfaces - 1
* Access: RW
*/
MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16);
enum mlxsw_reg_ratr_trap_action {
MLXSW_REG_RATR_TRAP_ACTION_NOP,
MLXSW_REG_RATR_TRAP_ACTION_TRAP,
MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU,
MLXSW_REG_RATR_TRAP_ACTION_MIRROR,
MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS,
};
/* reg_ratr_trap_action
* see mlxsw_reg_ratr_trap_action
* Access: RW
*/
MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4);
enum mlxsw_reg_ratr_trap_id {
MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0,
MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1,
};
/* reg_ratr_adjacency_index_high
* Bits 23:16 of the adjacency_index.
* Access: Index
*/
MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8);
/* reg_ratr_trap_id
* Trap ID to be reported to CPU.
* Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
* For trap_action of NOP, MIRROR and DISCARD_ERROR
* Access: RW
*/
MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8);
/* reg_ratr_eth_destination_mac
* MAC address of the destination next-hop.
* Access: RW
*/
MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6);
static inline void
mlxsw_reg_ratr_pack(char *payload,
enum mlxsw_reg_ratr_op op, bool valid,
u32 adjacency_index, u16 egress_rif)
{
MLXSW_REG_ZERO(ratr, payload);
mlxsw_reg_ratr_op_set(payload, op);
mlxsw_reg_ratr_v_set(payload, valid);
mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index);
mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16);
mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif);
}
static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload,
const char *dest_mac)
{
mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac);
}
/* RALTA - Router Algorithmic LPM Tree Allocation Register
* -------------------------------------------------------
* RALTA is used to allocate the LPM trees of the SHSPM method.
......@@ -3884,6 +4016,356 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload)
MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
}
/* RAUHT - Router Algorithmic LPM Unicast Host Table Register
* ----------------------------------------------------------
* The RAUHT register is used to configure and query the Unicast Host table in
* devices that implement the Algorithmic LPM.
*/
#define MLXSW_REG_RAUHT_ID 0x8014
#define MLXSW_REG_RAUHT_LEN 0x74
static const struct mlxsw_reg_info mlxsw_reg_rauht = {
.id = MLXSW_REG_RAUHT_ID,
.len = MLXSW_REG_RAUHT_LEN,
};
enum mlxsw_reg_rauht_type {
MLXSW_REG_RAUHT_TYPE_IPV4,
MLXSW_REG_RAUHT_TYPE_IPV6,
};
/* reg_rauht_type
* Access: Index
*/
MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2);
enum mlxsw_reg_rauht_op {
MLXSW_REG_RAUHT_OP_QUERY_READ = 0,
/* Read operation */
MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1,
/* Clear on read operation. Used to read entry and clear
* activity bit.
*/
MLXSW_REG_RAUHT_OP_WRITE_ADD = 0,
/* Add. Used to write a new entry to the table. All R/W fields are
* relevant for new entry. Activity bit is set for new entries.
*/
MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1,
/* Update action. Used to update an existing route entry and
* only update the following fields:
* trap_action, trap_id, mac, counter_set_type, counter_index
*/
MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2,
/* Clear activity. A bit is cleared for the entry. */
MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3,
/* Delete entry */
MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4,
/* Delete all host entries on a RIF. In this command, dip
* field is reserved.
*/
};
/* reg_rauht_op
* Access: OP
*/
MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3);
/* reg_rauht_a
* Activity. Set for new entries. Set if a packet lookup has hit on
* the specific entry.
* To clear the a bit, use "clear activity" op.
* Enabled by activity_dis in RGCR
* Access: RO
*/
MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1);
/* reg_rauht_rif
* Router Interface
* Access: Index
*/
MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16);
/* reg_rauht_dip*
* Destination address.
* Access: Index
*/
MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32);
enum mlxsw_reg_rauht_trap_action {
MLXSW_REG_RAUHT_TRAP_ACTION_NOP,
MLXSW_REG_RAUHT_TRAP_ACTION_TRAP,
MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU,
MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR,
MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS,
};
/* reg_rauht_trap_action
* Access: RW
*/
MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4);
enum mlxsw_reg_rauht_trap_id {
MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0,
MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1,
};
/* reg_rauht_trap_id
* Trap ID to be reported to CPU.
* Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
* For trap_action of NOP, MIRROR and DISCARD_ERROR,
* trap_id is reserved.
* Access: RW
*/
MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9);
/* reg_rauht_counter_set_type
* Counter set type for flow counters
* Access: RW
*/
MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8);
/* reg_rauht_counter_index
* Counter index for flow counters
* Access: RW
*/
MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24);
/* reg_rauht_mac
* MAC address.
* Access: RW
*/
MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6);
static inline void mlxsw_reg_rauht_pack(char *payload,
enum mlxsw_reg_rauht_op op, u16 rif,
const char *mac)
{
MLXSW_REG_ZERO(rauht, payload);
mlxsw_reg_rauht_op_set(payload, op);
mlxsw_reg_rauht_rif_set(payload, rif);
mlxsw_reg_rauht_mac_memcpy_to(payload, mac);
}
static inline void mlxsw_reg_rauht_pack4(char *payload,
enum mlxsw_reg_rauht_op op, u16 rif,
const char *mac, u32 dip)
{
mlxsw_reg_rauht_pack(payload, op, rif, mac);
mlxsw_reg_rauht_dip4_set(payload, dip);
}
/* RALEU - Router Algorithmic LPM ECMP Update Register
* ---------------------------------------------------
* The register enables updating the ECMP section in the action for multiple
* LPM Unicast entries in a single operation. The update is executed to
* all entries of a {virtual router, protocol} tuple using the same ECMP group.
*/
#define MLXSW_REG_RALEU_ID 0x8015
#define MLXSW_REG_RALEU_LEN 0x28
static const struct mlxsw_reg_info mlxsw_reg_raleu = {
.id = MLXSW_REG_RALEU_ID,
.len = MLXSW_REG_RALEU_LEN,
};
/* reg_raleu_protocol
* Protocol.
* Access: Index
*/
MLXSW_ITEM32(reg, raleu, protocol, 0x00, 24, 4);
/* reg_raleu_virtual_router
* Virtual Router ID
* Range is 0..cap_max_virtual_routers-1
* Access: Index
*/
MLXSW_ITEM32(reg, raleu, virtual_router, 0x00, 0, 16);
/* reg_raleu_adjacency_index
* Adjacency Index used for matching on the existing entries.
* Access: Index
*/
MLXSW_ITEM32(reg, raleu, adjacency_index, 0x10, 0, 24);
/* reg_raleu_ecmp_size
* ECMP Size used for matching on the existing entries.
* Access: Index
*/
MLXSW_ITEM32(reg, raleu, ecmp_size, 0x14, 0, 13);
/* reg_raleu_new_adjacency_index
* New Adjacency Index.
* Access: WO
*/
MLXSW_ITEM32(reg, raleu, new_adjacency_index, 0x20, 0, 24);
/* reg_raleu_new_ecmp_size
* New ECMP Size.
* Access: WO
*/
MLXSW_ITEM32(reg, raleu, new_ecmp_size, 0x24, 0, 13);
static inline void mlxsw_reg_raleu_pack(char *payload,
enum mlxsw_reg_ralxx_protocol protocol,
u16 virtual_router,
u32 adjacency_index, u16 ecmp_size,
u32 new_adjacency_index,
u16 new_ecmp_size)
{
MLXSW_REG_ZERO(raleu, payload);
mlxsw_reg_raleu_protocol_set(payload, protocol);
mlxsw_reg_raleu_virtual_router_set(payload, virtual_router);
mlxsw_reg_raleu_adjacency_index_set(payload, adjacency_index);
mlxsw_reg_raleu_ecmp_size_set(payload, ecmp_size);
mlxsw_reg_raleu_new_adjacency_index_set(payload, new_adjacency_index);
mlxsw_reg_raleu_new_ecmp_size_set(payload, new_ecmp_size);
}
/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register
* ----------------------------------------------------------------
* The RAUHTD register allows dumping entries from the Router Unicast Host
* Table. For a given session an entry is dumped no more than one time. The
* first RAUHTD access after reset is a new session. A session ends when the
* num_rec response is smaller than num_rec request or for IPv4 when the
* num_entries is smaller than 4. The clear activity affect the current session
* or the last session if a new session has not started.
*/
#define MLXSW_REG_RAUHTD_ID 0x8018
#define MLXSW_REG_RAUHTD_BASE_LEN 0x20
#define MLXSW_REG_RAUHTD_REC_LEN 0x20
#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32
#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \
MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN)
#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4
static const struct mlxsw_reg_info mlxsw_reg_rauhtd = {
.id = MLXSW_REG_RAUHTD_ID,
.len = MLXSW_REG_RAUHTD_LEN,
};
#define MLXSW_REG_RAUHTD_FILTER_A BIT(0)
#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3)
/* reg_rauhtd_filter_fields
* if a bit is '0' then the relevant field is ignored and dump is done
* regardless of the field value
* Bit0 - filter by activity: entry_a
* Bit3 - filter by entry rip: entry_rif
* Access: Index
*/
MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8);
enum mlxsw_reg_rauhtd_op {
MLXSW_REG_RAUHTD_OP_DUMP,
MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR,
};
/* reg_rauhtd_op
* Access: OP
*/
MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2);
/* reg_rauhtd_num_rec
* At request: number of records requested
* At response: number of records dumped
* For IPv4, each record has 4 entries at request and up to 4 entries
* at response
* Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM
* Access: Index
*/
MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8);
/* reg_rauhtd_entry_a
* Dump only if activity has value of entry_a
* Reserved if filter_fields bit0 is '0'
* Access: Index
*/
MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1);
enum mlxsw_reg_rauhtd_type {
MLXSW_REG_RAUHTD_TYPE_IPV4,
MLXSW_REG_RAUHTD_TYPE_IPV6,
};
/* reg_rauhtd_type
* Dump only if record type is:
* 0 - IPv4
* 1 - IPv6
* Access: Index
*/
MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4);
/* reg_rauhtd_entry_rif
* Dump only if RIF has value of entry_rif
* Reserved if filter_fields bit3 is '0'
* Access: Index
*/
MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16);
static inline void mlxsw_reg_rauhtd_pack(char *payload,
enum mlxsw_reg_rauhtd_type type)
{
MLXSW_REG_ZERO(rauhtd, payload);
mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A);
mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR);
mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM);
mlxsw_reg_rauhtd_entry_a_set(payload, 1);
mlxsw_reg_rauhtd_type_set(payload, type);
}
/* reg_rauhtd_ipv4_rec_num_entries
* Number of valid entries in this record:
* 0 - 1 valid entry
* 1 - 2 valid entries
* 2 - 3 valid entries
* 3 - 4 valid entries
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries,
MLXSW_REG_RAUHTD_BASE_LEN, 28, 2,
MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
/* reg_rauhtd_rec_type
* Record type.
* 0 - IPv4
* 1 - IPv6
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2,
MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8
/* reg_rauhtd_ipv4_ent_a
* Activity. Set for new entries. Set if a packet lookup has hit on the
* specific entry.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1,
MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
/* reg_rauhtd_ipv4_ent_rif
* Router interface.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
/* reg_rauhtd_ipv4_ent_dip
* Destination IPv4 address.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0,
32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false);
static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
int ent_index, u16 *p_rif,
u32 *p_dip)
{
*p_rif = mlxsw_reg_rauhtd_ipv4_ent_rif_get(payload, ent_index);
*p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index);
}
/* MFCR - Management Fan Control Register
* --------------------------------------
* This register controls the settings of the Fan Speed PWM mechanism.
......@@ -4626,6 +5108,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
return "RGCR";
case MLXSW_REG_RITR_ID:
return "RITR";
case MLXSW_REG_RATR_ID:
return "RATR";
case MLXSW_REG_RALTA_ID:
return "RALTA";
case MLXSW_REG_RALST_ID:
......@@ -4634,6 +5118,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
return "RALTB";
case MLXSW_REG_RALUE_ID:
return "RALUE";
case MLXSW_REG_RAUHT_ID:
return "RAUHT";
case MLXSW_REG_RALEU_ID:
return "RALEU";
case MLXSW_REG_RAUHTD_ID:
return "RAUHTD";
case MLXSW_REG_MFCR_ID:
return "MFCR";
case MLXSW_REG_MFSC_ID:
......
......@@ -803,6 +803,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_get_stats64 = mlxsw_sp_port_get_stats64,
.ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid,
.ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid,
.ndo_neigh_construct = mlxsw_sp_router_neigh_construct,
.ndo_neigh_destroy = mlxsw_sp_router_neigh_destroy,
.ndo_fdb_add = switchdev_port_fdb_add,
.ndo_fdb_del = switchdev_port_fdb_del,
.ndo_fdb_dump = switchdev_port_fdb_dump,
......@@ -2354,6 +2356,10 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
.max_ib_mc = 0,
.used_max_pkey = 1,
.max_pkey = 0,
.used_kvd_sizes = 1,
.kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE,
.kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE,
.kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE,
.swid_config = {
{
.used_type = 1,
......
......@@ -39,6 +39,7 @@
#include <linux/types.h>
#include <linux/netdevice.h>
#include <linux/rhashtable.h>
#include <linux/bitops.h>
#include <linux/if_vlan.h>
#include <linux/list.h>
......@@ -75,6 +76,10 @@
#define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL)
#define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL)
#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */
#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */
#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */
/* Maximum delay buffer needed in case of PAUSE frames, in cells.
* Assumes 100m cable and maximum MTU.
*/
......@@ -212,6 +217,15 @@ struct mlxsw_sp_vr {
struct mlxsw_sp_router {
struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX];
struct rhashtable neigh_ht;
struct {
struct delayed_work dw;
unsigned long interval; /* ms */
} neighs_update;
struct delayed_work nexthop_probe_dw;
#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
struct list_head nexthop_group_list;
struct list_head nexthop_neighs_list;
};
struct mlxsw_sp {
......@@ -243,6 +257,9 @@ struct mlxsw_sp {
u8 port_to_module[MLXSW_PORT_MAX_PORTS];
struct mlxsw_sp_sb sb;
struct mlxsw_sp_router router;
struct {
DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE);
} kvdl;
};
static inline struct mlxsw_sp_upper *
......@@ -524,5 +541,12 @@ int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port,
struct switchdev_trans *trans);
int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
const struct switchdev_obj_ipv4_fib *fib4);
int mlxsw_sp_router_neigh_construct(struct net_device *dev,
struct neighbour *n);
void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
struct neighbour *n);
int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
#endif
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/kernel.h>
#include <linux/bitops.h>
#include "spectrum.h"
#define MLXSW_SP_KVDL_SINGLE_BASE 0
#define MLXSW_SP_KVDL_SINGLE_SIZE 16384
#define MLXSW_SP_KVDL_CHUNKS_BASE \
(MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE)
#define MLXSW_SP_KVDL_CHUNKS_SIZE \
(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE)
#define MLXSW_SP_CHUNK_MAX 32
int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count)
{
int entry_index;
int size;
int type_base;
int type_size;
int type_entries;
if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) {
return -EINVAL;
} else if (entry_count == 1) {
type_base = MLXSW_SP_KVDL_SINGLE_BASE;
type_size = MLXSW_SP_KVDL_SINGLE_SIZE;
type_entries = 1;
} else {
type_base = MLXSW_SP_KVDL_CHUNKS_BASE;
type_size = MLXSW_SP_KVDL_CHUNKS_SIZE;
type_entries = MLXSW_SP_CHUNK_MAX;
}
entry_index = type_base;
size = type_base + type_size;
for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) {
int i;
for (i = 0; i < type_entries; i++)
set_bit(entry_index + i, mlxsw_sp->kvdl.usage);
return entry_index;
}
return -ENOBUFS;
}
void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
{
int type_entries;
int i;
if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE)
type_entries = 1;
else
type_entries = MLXSW_SP_CHUNK_MAX;
for (i = 0; i < type_entries; i++)
clear_bit(entry_index + i, mlxsw_sp->kvdl.usage);
}
......@@ -3,6 +3,7 @@
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
* Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......@@ -38,6 +39,10 @@
#include <linux/rhashtable.h>
#include <linux/bitops.h>
#include <linux/in6.h>
#include <linux/notifier.h>
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
#include "spectrum.h"
#include "core.h"
......@@ -112,6 +117,8 @@ enum mlxsw_sp_fib_entry_type {
MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
};
struct mlxsw_sp_nexthop_group;
struct mlxsw_sp_fib_entry {
struct rhash_head ht_node;
struct mlxsw_sp_fib_key key;
......@@ -119,6 +126,8 @@ struct mlxsw_sp_fib_entry {
u8 added:1;
u16 rif; /* used for action local */
struct mlxsw_sp_vr *vr;
struct list_head nexthop_group_node;
struct mlxsw_sp_nexthop_group *nh_group;
};
struct mlxsw_sp_fib {
......@@ -544,6 +553,949 @@ static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
}
}
struct mlxsw_sp_neigh_key {
unsigned char addr[sizeof(struct in6_addr)];
struct net_device *dev;
};
struct mlxsw_sp_neigh_entry {
struct rhash_head ht_node;
struct mlxsw_sp_neigh_key key;
u16 rif;
struct neighbour *n;
bool offloaded;
struct delayed_work dw;
struct mlxsw_sp_port *mlxsw_sp_port;
unsigned char ha[ETH_ALEN];
struct list_head nexthop_list; /* list of nexthops using
* this neigh entry
*/
struct list_head nexthop_neighs_list_node;
};
static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
.key_len = sizeof(struct mlxsw_sp_neigh_key),
};
static int
mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry)
{
return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
&neigh_entry->ht_node,
mlxsw_sp_neigh_ht_params);
}
static void
mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry)
{
rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
&neigh_entry->ht_node,
mlxsw_sp_neigh_ht_params);
}
static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work);
static struct mlxsw_sp_neigh_entry *
mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len,
struct net_device *dev, u16 rif,
struct neighbour *n)
{
struct mlxsw_sp_neigh_entry *neigh_entry;
neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC);
if (!neigh_entry)
return NULL;
memcpy(neigh_entry->key.addr, addr, addr_len);
neigh_entry->key.dev = dev;
neigh_entry->rif = rif;
neigh_entry->n = n;
INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
INIT_LIST_HEAD(&neigh_entry->nexthop_list);
return neigh_entry;
}
static void
mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry)
{
kfree(neigh_entry);
}
static struct mlxsw_sp_neigh_entry *
mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr,
size_t addr_len, struct net_device *dev)
{
struct mlxsw_sp_neigh_key key = {{ 0 } };
memcpy(key.addr, addr, addr_len);
key.dev = dev;
return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
&key, mlxsw_sp_neigh_ht_params);
}
int mlxsw_sp_router_neigh_construct(struct net_device *dev,
struct neighbour *n)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_neigh_entry *neigh_entry;
struct mlxsw_sp_rif *r;
u32 dip;
int err;
if (n->tbl != &arp_tbl)
return 0;
dip = ntohl(*((__be32 *) n->primary_key));
neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
n->dev);
if (neigh_entry) {
WARN_ON(neigh_entry->n != n);
return 0;
}
r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
if (WARN_ON(!r))
return -EINVAL;
neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev,
r->rif, n);
if (!neigh_entry)
return -ENOMEM;
err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
if (err)
goto err_neigh_entry_insert;
return 0;
err_neigh_entry_insert:
mlxsw_sp_neigh_entry_destroy(neigh_entry);
return err;
}
void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
struct neighbour *n)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_neigh_entry *neigh_entry;
u32 dip;
if (n->tbl != &arp_tbl)
return;
dip = ntohl(*((__be32 *) n->primary_key));
neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
n->dev);
if (!neigh_entry)
return;
mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
mlxsw_sp_neigh_entry_destroy(neigh_entry);
}
static void
mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
{
unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
}
static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
char *rauhtd_pl,
int ent_index)
{
struct net_device *dev;
struct neighbour *n;
__be32 dipn;
u32 dip;
u16 rif;
mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
if (!mlxsw_sp->rifs[rif]) {
dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
return;
}
dipn = htonl(dip);
dev = mlxsw_sp->rifs[rif]->dev;
n = neigh_lookup(&arp_tbl, &dipn, dev);
if (!n) {
netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
&dip);
return;
}
netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
neigh_event_send(n, NULL);
neigh_release(n);
}
static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
char *rauhtd_pl,
int rec_index)
{
u8 num_entries;
int i;
num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
rec_index);
/* Hardware starts counting at 0, so add 1. */
num_entries++;
/* Each record consists of several neighbour entries. */
for (i = 0; i < num_entries; i++) {
int ent_index;
ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
ent_index);
}
}
static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
char *rauhtd_pl, int rec_index)
{
switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
case MLXSW_REG_RAUHTD_TYPE_IPV4:
mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
rec_index);
break;
case MLXSW_REG_RAUHTD_TYPE_IPV6:
WARN_ON_ONCE(1);
break;
}
}
static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
{
char *rauhtd_pl;
u8 num_rec;
int i, err;
rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
if (!rauhtd_pl)
return -ENOMEM;
/* Make sure the neighbour's netdev isn't removed in the
* process.
*/
rtnl_lock();
do {
mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
rauhtd_pl);
if (err) {
dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
break;
}
num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
for (i = 0; i < num_rec; i++)
mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
i);
} while (num_rec);
rtnl_unlock();
kfree(rauhtd_pl);
return err;
}
static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_sp_neigh_entry *neigh_entry;
/* Take RTNL mutex here to prevent lists from changes */
rtnl_lock();
list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
nexthop_neighs_list_node) {
/* If this neigh have nexthops, make the kernel think this neigh
* is active regardless of the traffic.
*/
if (!list_empty(&neigh_entry->nexthop_list))
neigh_event_send(neigh_entry->n, NULL);
}
rtnl_unlock();
}
static void
mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
{
unsigned long interval = mlxsw_sp->router.neighs_update.interval;
mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
msecs_to_jiffies(interval));
}
static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
{
struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
router.neighs_update.dw.work);
int err;
err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
if (err)
dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
}
static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
{
struct mlxsw_sp_neigh_entry *neigh_entry;
struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
router.nexthop_probe_dw.work);
/* Iterate over nexthop neighbours, find those who are unresolved and
* send arp on them. This solves the chicken-egg problem when
* the nexthop wouldn't get offloaded until the neighbor is resolved
* but it wouldn't get resolved ever in case traffic is flowing in HW
* using different nexthop.
*
* Take RTNL mutex here to prevent lists from changes.
*/
rtnl_lock();
list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
nexthop_neighs_list_node) {
if (!(neigh_entry->n->nud_state & NUD_VALID) &&
!list_empty(&neigh_entry->nexthop_list))
neigh_event_send(neigh_entry->n, NULL);
}
rtnl_unlock();
mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
}
static void
mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry,
bool removing);
static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
{
struct mlxsw_sp_neigh_entry *neigh_entry =
container_of(work, struct mlxsw_sp_neigh_entry, dw.work);
struct neighbour *n = neigh_entry->n;
struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port;
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char rauht_pl[MLXSW_REG_RAUHT_LEN];
struct net_device *dev;
bool entry_connected;
u8 nud_state;
bool updating;
bool removing;
bool adding;
u32 dip;
int err;
read_lock_bh(&n->lock);
dip = ntohl(*((__be32 *) n->primary_key));
memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha));
nud_state = n->nud_state;
dev = n->dev;
read_unlock_bh(&n->lock);
entry_connected = nud_state & NUD_VALID;
adding = (!neigh_entry->offloaded) && entry_connected;
updating = neigh_entry->offloaded && entry_connected;
removing = neigh_entry->offloaded && !entry_connected;
if (adding || updating) {
mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD,
neigh_entry->rif,
neigh_entry->ha, dip);
err = mlxsw_reg_write(mlxsw_sp->core,
MLXSW_REG(rauht), rauht_pl);
if (err) {
netdev_err(dev, "Could not add neigh %pI4h\n", &dip);
neigh_entry->offloaded = false;
} else {
neigh_entry->offloaded = true;
}
mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false);
} else if (removing) {
mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE,
neigh_entry->rif,
neigh_entry->ha, dip);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht),
rauht_pl);
if (err) {
netdev_err(dev, "Could not delete neigh %pI4h\n", &dip);
neigh_entry->offloaded = true;
} else {
neigh_entry->offloaded = false;
}
mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true);
}
neigh_release(n);
mlxsw_sp_port_dev_put(mlxsw_sp_port);
}
static int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct mlxsw_sp_neigh_entry *neigh_entry;
struct mlxsw_sp_port *mlxsw_sp_port;
struct mlxsw_sp *mlxsw_sp;
unsigned long interval;
struct net_device *dev;
struct neigh_parms *p;
struct neighbour *n;
u32 dip;
switch (event) {
case NETEVENT_DELAY_PROBE_TIME_UPDATE:
p = ptr;
/* We don't care about changes in the default table. */
if (!p->dev || p->tbl != &arp_tbl)
return NOTIFY_DONE;
/* We are in atomic context and can't take RTNL mutex,
* so use RCU variant to walk the device chain.
*/
mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
if (!mlxsw_sp_port)
return NOTIFY_DONE;
mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
mlxsw_sp->router.neighs_update.interval = interval;
mlxsw_sp_port_dev_put(mlxsw_sp_port);
break;
case NETEVENT_NEIGH_UPDATE:
n = ptr;
dev = n->dev;
if (n->tbl != &arp_tbl)
return NOTIFY_DONE;
mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev);
if (!mlxsw_sp_port)
return NOTIFY_DONE;
mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
dip = ntohl(*((__be32 *) n->primary_key));
neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp,
&dip,
sizeof(__be32),
dev);
if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) {
mlxsw_sp_port_dev_put(mlxsw_sp_port);
return NOTIFY_DONE;
}
neigh_entry->mlxsw_sp_port = mlxsw_sp_port;
/* Take a reference to ensure the neighbour won't be
* destructed until we drop the reference in delayed
* work.
*/
neigh_clone(n);
if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) {
neigh_release(n);
mlxsw_sp_port_dev_put(mlxsw_sp_port);
}
break;
}
return NOTIFY_DONE;
}
static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
.notifier_call = mlxsw_sp_router_netevent_event,
};
static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
{
int err;
err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
&mlxsw_sp_neigh_ht_params);
if (err)
return err;
/* Initialize the polling interval according to the default
* table.
*/
mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
if (err)
goto err_register_netevent_notifier;
/* Create the delayed works for the activity_update */
INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
mlxsw_sp_router_neighs_update_work);
INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
mlxsw_sp_router_probe_unresolved_nexthops);
mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
return 0;
err_register_netevent_notifier:
rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
return err;
}
static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
{
cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
}
struct mlxsw_sp_nexthop {
struct list_head neigh_list_node; /* member of neigh entry list */
struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
* this belongs to
*/
u8 should_offload:1, /* set indicates this neigh is connected and
* should be put to KVD linear area of this group.
*/
offloaded:1, /* set in case the neigh is actually put into
* KVD linear area of this group.
*/
update:1; /* set indicates that MAC of this neigh should be
* updated in HW
*/
struct mlxsw_sp_neigh_entry *neigh_entry;
};
struct mlxsw_sp_nexthop_group {
struct list_head list; /* node in mlxsw->router.nexthop_group_list */
struct list_head fib_list; /* list of fib entries that use this group */
u8 adj_index_valid:1;
u32 adj_index;
u16 ecmp_size;
u16 count;
struct mlxsw_sp_nexthop nexthops[0];
};
static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_vr *vr,
u32 adj_index, u16 ecmp_size,
u32 new_adj_index,
u16 new_ecmp_size)
{
char raleu_pl[MLXSW_REG_RALEU_LEN];
mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id,
adj_index, ecmp_size,
new_adj_index, new_ecmp_size);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
}
static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
u32 old_adj_index, u16 old_ecmp_size)
{
struct mlxsw_sp_fib_entry *fib_entry;
struct mlxsw_sp_vr *vr = NULL;
int err;
list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
if (vr == fib_entry->vr)
continue;
vr = fib_entry->vr;
err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
old_adj_index,
old_ecmp_size,
nh_grp->adj_index,
nh_grp->ecmp_size);
if (err)
return err;
}
return 0;
}
static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh)
{
struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
char ratr_pl[MLXSW_REG_RATR_LEN];
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
true, adj_index, neigh_entry->rif);
mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
}
static int
mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
u32 adj_index = nh_grp->adj_index; /* base */
struct mlxsw_sp_nexthop *nh;
int i;
int err;
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
if (!nh->should_offload) {
nh->offloaded = 0;
continue;
}
if (nh->update) {
err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
adj_index, nh);
if (err)
return err;
nh->update = 0;
nh->offloaded = 1;
}
adj_index++;
}
return 0;
}
static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry);
static int
mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
struct mlxsw_sp_fib_entry *fib_entry;
int err;
list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
if (err)
return err;
}
return 0;
}
static void
mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
struct mlxsw_sp_nexthop *nh;
bool offload_change = false;
u32 adj_index;
u16 ecmp_size = 0;
bool old_adj_index_valid;
u32 old_adj_index;
u16 old_ecmp_size;
int ret;
int i;
int err;
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
if (nh->should_offload ^ nh->offloaded) {
offload_change = true;
if (nh->should_offload)
nh->update = 1;
}
if (nh->should_offload)
ecmp_size++;
}
if (!offload_change) {
/* Nothing was added or removed, so no need to reallocate. Just
* update MAC on existing adjacency indexes.
*/
err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
goto set_trap;
}
return;
}
if (!ecmp_size)
/* No neigh of this group is connected so we just set
* the trap and let everthing flow through kernel.
*/
goto set_trap;
ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
if (ret < 0) {
/* We ran out of KVD linear space, just set the
* trap and let everything flow through kernel.
*/
dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
goto set_trap;
}
adj_index = ret;
old_adj_index_valid = nh_grp->adj_index_valid;
old_adj_index = nh_grp->adj_index;
old_ecmp_size = nh_grp->ecmp_size;
nh_grp->adj_index_valid = 1;
nh_grp->adj_index = adj_index;
nh_grp->ecmp_size = ecmp_size;
err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
goto set_trap;
}
if (!old_adj_index_valid) {
/* The trap was set for fib entries, so we have to call
* fib entry update to unset it and use adjacency index.
*/
err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
goto set_trap;
}
return;
}
err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
old_adj_index, old_ecmp_size);
mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
goto set_trap;
}
return;
set_trap:
old_adj_index_valid = nh_grp->adj_index_valid;
nh_grp->adj_index_valid = 0;
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
nh->offloaded = 0;
}
err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
if (err)
dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
if (old_adj_index_valid)
mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
}
static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
bool removing)
{
if (!removing && !nh->should_offload)
nh->should_offload = 1;
else if (removing && nh->offloaded)
nh->should_offload = 0;
nh->update = 1;
}
static void
mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry,
bool removing)
{
struct mlxsw_sp_nexthop *nh;
/* Take RTNL mutex here to prevent lists from changes */
rtnl_lock();
list_for_each_entry(nh, &neigh_entry->nexthop_list,
neigh_list_node) {
__mlxsw_sp_nexthop_neigh_update(nh, removing);
mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
}
rtnl_unlock();
}
static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_nexthop *nh,
struct fib_nh *fib_nh)
{
struct mlxsw_sp_neigh_entry *neigh_entry;
u32 gwip = ntohl(fib_nh->nh_gw);
struct net_device *dev = fib_nh->nh_dev;
struct neighbour *n;
u8 nud_state;
neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
sizeof(gwip), dev);
if (!neigh_entry) {
__be32 gwipn = htonl(gwip);
n = neigh_create(&arp_tbl, &gwipn, dev);
if (IS_ERR(n))
return PTR_ERR(n);
neigh_event_send(n, NULL);
neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
sizeof(gwip), dev);
if (!neigh_entry) {
neigh_release(n);
return -EINVAL;
}
} else {
/* Take a reference of neigh here ensuring that neigh would
* not be detructed before the nexthop entry is finished.
* The second branch takes the reference in neith_create()
*/
n = neigh_entry->n;
neigh_clone(n);
}
/* If that is the first nexthop connected to that neigh, add to
* nexthop_neighs_list
*/
if (list_empty(&neigh_entry->nexthop_list))
list_add_tail(&neigh_entry->nexthop_neighs_list_node,
&mlxsw_sp->router.nexthop_neighs_list);
nh->nh_grp = nh_grp;
nh->neigh_entry = neigh_entry;
list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
read_lock_bh(&n->lock);
nud_state = n->nud_state;
read_unlock_bh(&n->lock);
__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID));
return 0;
}
static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
list_del(&nh->neigh_list_node);
/* If that is the last nexthop connected to that neigh, remove from
* nexthop_neighs_list
*/
if (list_empty(&nh->neigh_entry->nexthop_list))
list_del(&nh->neigh_entry->nexthop_neighs_list_node);
neigh_release(neigh_entry->n);
}
static struct mlxsw_sp_nexthop_group *
mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
{
struct mlxsw_sp_nexthop_group *nh_grp;
struct mlxsw_sp_nexthop *nh;
struct fib_nh *fib_nh;
size_t alloc_size;
int i;
int err;
alloc_size = sizeof(*nh_grp) +
fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
nh_grp = kzalloc(alloc_size, GFP_KERNEL);
if (!nh_grp)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&nh_grp->fib_list);
nh_grp->count = fi->fib_nhs;
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
fib_nh = &fi->fib_nh[i];
err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
if (err)
goto err_nexthop_init;
}
list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list);
mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
return nh_grp;
err_nexthop_init:
for (i--; i >= 0; i--)
mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
kfree(nh_grp);
return ERR_PTR(err);
}
static void
mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
{
struct mlxsw_sp_nexthop *nh;
int i;
list_del(&nh_grp->list);
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
}
kfree(nh_grp);
}
static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
struct fib_info *fi)
{
int i;
for (i = 0; i < fi->fib_nhs; i++) {
struct fib_nh *fib_nh = &fi->fib_nh[i];
u32 gwip = ntohl(fib_nh->nh_gw);
if (memcmp(nh->neigh_entry->key.addr,
&gwip, sizeof(u32)) == 0 &&
nh->neigh_entry->key.dev == fib_nh->nh_dev)
return true;
}
return false;
}
static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp,
struct fib_info *fi)
{
int i;
if (nh_grp->count != fi->fib_nhs)
return false;
for (i = 0; i < nh_grp->count; i++) {
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
if (!mlxsw_sp_nexthop_match(nh, fi))
return false;
}
return true;
}
static struct mlxsw_sp_nexthop_group *
mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
{
struct mlxsw_sp_nexthop_group *nh_grp;
list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list,
list) {
if (mlxsw_sp_nexthop_group_match(nh_grp, fi))
return nh_grp;
}
return NULL;
}
static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
struct fib_info *fi)
{
struct mlxsw_sp_nexthop_group *nh_grp;
nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi);
if (!nh_grp) {
nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
if (IS_ERR(nh_grp))
return PTR_ERR(nh_grp);
}
list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
fib_entry->nh_group = nh_grp;
return 0;
}
static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
list_del(&fib_entry->nexthop_group_node);
if (!list_empty(&nh_grp->fib_list))
return;
mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
}
static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
char rgcr_pl[MLXSW_REG_RGCR_LEN];
......@@ -565,19 +1517,54 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
int err;
INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
err = __mlxsw_sp_router_init(mlxsw_sp);
if (err)
return err;
mlxsw_sp_lpm_init(mlxsw_sp);
mlxsw_sp_vrs_init(mlxsw_sp);
return 0;
return mlxsw_sp_neigh_init(mlxsw_sp);
}
void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
{
mlxsw_sp_neigh_fini(mlxsw_sp);
__mlxsw_sp_router_fini(mlxsw_sp);
}
static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op)
{
char ralue_pl[MLXSW_REG_RALUE_LEN];
u32 *p_dip = (u32 *) fib_entry->key.addr;
struct mlxsw_sp_vr *vr = fib_entry->vr;
enum mlxsw_reg_ralue_trap_action trap_action;
u16 trap_id = 0;
u32 adjacency_index = 0;
u16 ecmp_size = 0;
/* In case the nexthop group adjacency index is valid, use it
* with provided ECMP size. Otherwise, setup trap and pass
* traffic to kernel.
*/
if (fib_entry->nh_group->adj_index_valid) {
trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
adjacency_index = fib_entry->nh_group->adj_index;
ecmp_size = fib_entry->nh_group->ecmp_size;
} else {
trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
}
mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
fib_entry->key.prefix_len, *p_dip);
mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
adjacency_index, ecmp_size);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op)
......@@ -614,7 +1601,7 @@ static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
{
switch (fib_entry->type) {
case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
return -EINVAL;
return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
......@@ -694,7 +1681,17 @@ mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
fib_entry->rif = r->rif;
return 0;
}
return -EINVAL;
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
}
static void
mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
return;
mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
}
static int
......@@ -738,6 +1735,7 @@ mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port,
return 0;
err_alloc_info:
mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
err_fib4_entry_init:
mlxsw_sp_fib_entry_destroy(fib_entry);
err_fib_entry_create:
......@@ -772,6 +1770,7 @@ mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port,
err_fib_entry_add:
mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
err_fib_entry_insert:
mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
mlxsw_sp_fib_entry_destroy(fib_entry);
mlxsw_sp_vr_put(mlxsw_sp, vr);
return err;
......@@ -808,6 +1807,7 @@ int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
}
mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry);
mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
mlxsw_sp_fib_entry_destroy(fib_entry);
mlxsw_sp_vr_put(mlxsw_sp, vr);
return 0;
......
......@@ -1996,7 +1996,8 @@ static int rocker_port_change_proto_down(struct net_device *dev,
return 0;
}
static void rocker_port_neigh_destroy(struct neighbour *n)
static void rocker_port_neigh_destroy(struct net_device *dev,
struct neighbour *n)
{
struct rocker_port *rocker_port = netdev_priv(n->dev);
int err;
......
......@@ -2002,6 +2002,8 @@ static const struct net_device_ops team_netdev_ops = {
.ndo_add_slave = team_add_slave,
.ndo_del_slave = team_del_slave,
.ndo_fix_features = team_fix_features,
.ndo_neigh_construct = netdev_default_l2upper_neigh_construct,
.ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy,
.ndo_change_carrier = team_change_carrier,
.ndo_bridge_setlink = switchdev_port_bridge_setlink,
.ndo_bridge_getlink = switchdev_port_bridge_getlink,
......
......@@ -1209,8 +1209,10 @@ struct net_device_ops {
netdev_features_t features);
int (*ndo_set_features)(struct net_device *dev,
netdev_features_t features);
int (*ndo_neigh_construct)(struct neighbour *n);
void (*ndo_neigh_destroy)(struct neighbour *n);
int (*ndo_neigh_construct)(struct net_device *dev,
struct neighbour *n);
void (*ndo_neigh_destroy)(struct net_device *dev,
struct neighbour *n);
int (*ndo_fdb_add)(struct ndmsg *ndm,
struct nlattr *tb[],
......@@ -3843,6 +3845,10 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
struct net_device *lower_dev);
void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info);
int netdev_default_l2upper_neigh_construct(struct net_device *dev,
struct neighbour *n);
void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
struct neighbour *n);
/* RSS keys are 40 or 52 bytes long */
#define NETDEV_RSS_KEY_LEN 52
......
......@@ -24,6 +24,7 @@ struct netevent_redirect {
enum netevent_notif_type {
NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
};
int register_netevent_notifier(struct notifier_block *nb);
......
......@@ -790,6 +790,8 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup,
#endif
.ndo_fix_features = vlan_dev_fix_features,
.ndo_neigh_construct = netdev_default_l2upper_neigh_construct,
.ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy,
.ndo_fdb_add = switchdev_port_fdb_add,
.ndo_fdb_del = switchdev_port_fdb_del,
.ndo_fdb_dump = switchdev_port_fdb_dump,
......
......@@ -286,7 +286,7 @@ static const struct neigh_ops clip_neigh_ops = {
.connected_output = neigh_direct_output,
};
static int clip_constructor(struct neighbour *neigh)
static int clip_constructor(struct net_device *dev, struct neighbour *neigh)
{
struct atmarp_entry *entry = neighbour_priv(neigh);
......
......@@ -349,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_add_slave = br_add_slave,
.ndo_del_slave = br_del_slave,
.ndo_fix_features = br_fix_features,
.ndo_neigh_construct = netdev_default_l2upper_neigh_construct,
.ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy,
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
.ndo_fdb_dump = br_fdb_dump,
......
......@@ -6087,6 +6087,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
}
EXPORT_SYMBOL(netdev_lower_state_changed);
int netdev_default_l2upper_neigh_construct(struct net_device *dev,
struct neighbour *n)
{
struct net_device *lower_dev, *stop_dev;
struct list_head *iter;
int err;
netdev_for_each_lower_dev(dev, lower_dev, iter) {
if (!lower_dev->netdev_ops->ndo_neigh_construct)
continue;
err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
if (err) {
stop_dev = lower_dev;
goto rollback;
}
}
return 0;
rollback:
netdev_for_each_lower_dev(dev, lower_dev, iter) {
if (lower_dev == stop_dev)
break;
if (!lower_dev->netdev_ops->ndo_neigh_destroy)
continue;
lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
}
return err;
}
EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
struct neighbour *n)
{
struct net_device *lower_dev;
struct list_head *iter;
netdev_for_each_lower_dev(dev, lower_dev, iter) {
if (!lower_dev->netdev_ops->ndo_neigh_destroy)
continue;
lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
}
}
EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
......
......@@ -473,7 +473,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
}
if (dev->netdev_ops->ndo_neigh_construct) {
error = dev->netdev_ops->ndo_neigh_construct(n);
error = dev->netdev_ops->ndo_neigh_construct(dev, n);
if (error < 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
......@@ -701,7 +701,7 @@ void neigh_destroy(struct neighbour *neigh)
neigh->arp_queue_len_bytes = 0;
if (dev->netdev_ops->ndo_neigh_destroy)
dev->netdev_ops->ndo_neigh_destroy(neigh);
dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
dev_put(dev);
neigh_parms_put(neigh->parms);
......@@ -2047,6 +2047,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
case NDTPA_DELAY_PROBE_TIME:
NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
nla_get_msecs(tbp[i]));
call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
break;
case NDTPA_RETRANS_TIME:
NEIGH_VAR_SET(p, RETRANS_TIME,
......@@ -2930,6 +2931,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
return;
set_bit(index, p->data_state);
call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
if (!dev) /* NULL dev means this is default value */
neigh_copy_dflt_parms(net, p, index);
}
......
......@@ -81,7 +81,7 @@ static int lowpan_stop(struct net_device *dev)
return 0;
}
static int lowpan_neigh_construct(struct neighbour *n)
static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
{
struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment