Commit abef8495 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mlxsw-handle-mtu-values'

Petr Machata says:

====================
mlxsw: Handle MTU values

Amit Cohen writes:

The driver uses two values for maximum MTU, but neither is accurate.
In addition, the value which is configured to hardware is not calculated
correctly. Handle these issues and expose accurate values for minimum
and maximum MTU per netdevice.

Add test cases to check that the exposed values are really supported.

Patch set overview:
Patches #1-#3 set the driver to use accurate values for MTU
Patch #4 aligns the driver to always use the same value for maximum MTU
Patch #5 adds a test
====================

Link: https://lore.kernel.org/r/cover.1718275854.git.petrm@nvidia.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents afc5625e 4be3dcc9
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
#include <linux/types.h> #include <linux/types.h>
#define MLXSW_PORT_MAX_MTU 10000 #define MLXSW_PORT_MAX_MTU (10 * 1024)
#define MLXSW_PORT_ETH_FRAME_HDR (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
#define MLXSW_PORT_DEFAULT_VID 1 #define MLXSW_PORT_DEFAULT_VID 1
......
...@@ -405,29 +405,12 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) ...@@ -405,29 +405,12 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port)
mlxsw_sp_port->dev->dev_addr); mlxsw_sp_port->dev->dev_addr);
} }
static int mlxsw_sp_port_max_mtu_get(struct mlxsw_sp_port *mlxsw_sp_port, int *p_max_mtu)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char pmtu_pl[MLXSW_REG_PMTU_LEN];
int err;
mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, 0);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl);
if (err)
return err;
*p_max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
return 0;
}
static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
{ {
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char pmtu_pl[MLXSW_REG_PMTU_LEN]; char pmtu_pl[MLXSW_REG_PMTU_LEN];
mtu += MLXSW_TXHDR_LEN + ETH_HLEN; mtu += MLXSW_PORT_ETH_FRAME_HDR;
if (mtu > mlxsw_sp_port->max_mtu)
return -EINVAL;
mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, mtu); mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, mtu);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl);
...@@ -1697,8 +1680,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, ...@@ -1697,8 +1680,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port,
NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK;
dev->min_mtu = 0; dev->min_mtu = ETH_MIN_MTU;
dev->max_mtu = ETH_MAX_MTU; dev->max_mtu = MLXSW_PORT_MAX_MTU - MLXSW_PORT_ETH_FRAME_HDR;
/* Each packet needs to have a Tx header (metadata) on top all other /* Each packet needs to have a Tx header (metadata) on top all other
* headers. * headers.
...@@ -1727,13 +1710,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, ...@@ -1727,13 +1710,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port,
goto err_max_speed_get; goto err_max_speed_get;
} }
err = mlxsw_sp_port_max_mtu_get(mlxsw_sp_port, &mlxsw_sp_port->max_mtu);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get maximum MTU\n",
mlxsw_sp_port->local_port);
goto err_port_max_mtu_get;
}
err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN); err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN);
if (err) { if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n", dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n",
...@@ -1877,7 +1853,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, ...@@ -1877,7 +1853,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port,
err_port_buffers_init: err_port_buffers_init:
err_port_admin_status_set: err_port_admin_status_set:
err_port_mtu_set: err_port_mtu_set:
err_port_max_mtu_get:
err_max_speed_get: err_max_speed_get:
err_port_speed_by_width_set: err_port_speed_by_width_set:
err_port_system_port_mapping_set: err_port_system_port_mapping_set:
......
...@@ -359,7 +359,6 @@ struct mlxsw_sp_port { ...@@ -359,7 +359,6 @@ struct mlxsw_sp_port {
u16 egr_types; u16 egr_types;
struct mlxsw_sp_ptp_port_stats stats; struct mlxsw_sp_ptp_port_stats stats;
} ptp; } ptp;
int max_mtu;
u32 max_speed; u32 max_speed;
struct mlxsw_sp_hdroom *hdroom; struct mlxsw_sp_hdroom *hdroom;
u64 module_overheat_initial_val; u64 module_overheat_initial_val;
......
...@@ -399,11 +399,13 @@ void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port, ...@@ -399,11 +399,13 @@ void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_hdroom *hdroom) struct mlxsw_sp_hdroom *hdroom)
{ {
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
unsigned int max_mtu = mlxsw_sp_port->dev->max_mtu;
u16 reserve_cells; u16 reserve_cells;
int i; int i;
max_mtu += MLXSW_PORT_ETH_FRAME_HDR;
/* Internal buffer. */ /* Internal buffer. */
reserve_cells = mlxsw_sp_hdroom_int_buf_size_get(mlxsw_sp, mlxsw_sp_port->max_mtu, reserve_cells = mlxsw_sp_hdroom_int_buf_size_get(mlxsw_sp, max_mtu,
mlxsw_sp_port->max_speed); mlxsw_sp_port->max_speed);
reserve_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, reserve_cells); reserve_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, reserve_cells);
hdroom->int_buf.reserve_cells = reserve_cells; hdroom->int_buf.reserve_cells = reserve_cells;
...@@ -613,7 +615,9 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) ...@@ -613,7 +615,9 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port)
mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom);
/* Buffer 9 is used for control traffic. */ /* Buffer 9 is used for control traffic. */
size9 = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, mlxsw_sp_port->max_mtu); size9 = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port,
mlxsw_sp_port->dev->max_mtu +
MLXSW_PORT_ETH_FRAME_HDR);
hdroom.bufs.buf[9].size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size9); hdroom.bufs.buf[9].size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size9);
return __mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom, true); return __mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom, true);
......
...@@ -39,6 +39,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ ...@@ -39,6 +39,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
ipip_hier_gre.sh \ ipip_hier_gre.sh \
lib_sh_test.sh \ lib_sh_test.sh \
local_termination.sh \ local_termination.sh \
min_max_mtu.sh \
mirror_gre_bound.sh \ mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d.sh \
mirror_gre_bridge_1d_vlan.sh \ mirror_gre_bridge_1d_vlan.sh \
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# +--------------------+
# | H1 |
# | |
# | $h1.10 + |
# | 192.0.2.2/24 | |
# | 2001:db8:1::2/64 | |
# | | |
# | $h1 + |
# | | |
# +------------------|-+
# |
# +------------------|-+
# | SW | |
# | $swp1 + |
# | | |
# | $swp1.10 + |
# | 192.0.2.1/24 |
# | 2001:db8:1::1/64 |
# | |
# +--------------------+
ALL_TESTS="
ping_ipv4
ping_ipv6
max_mtu_config_test
max_mtu_traffic_test
min_mtu_config_test
min_mtu_traffic_test
"
NUM_NETIFS=2
source lib.sh
h1_create()
{
simple_if_init $h1
vlan_create $h1 10 v$h1 192.0.2.2/24 2001:db8:1::2/64
}
h1_destroy()
{
vlan_destroy $h1 10 192.0.2.2/24 2001:db8:1::2/64
simple_if_fini $h1
}
switch_create()
{
ip li set dev $swp1 up
vlan_create $swp1 10 "" 192.0.2.1/24 2001:db8:1::1/64
}
switch_destroy()
{
ip li set dev $swp1 down
vlan_destroy $swp1 10
}
setup_prepare()
{
h1=${NETIFS[p1]}
swp1=${NETIFS[p2]}
vrf_prepare
h1_create
switch_create
forwarding_enable
}
cleanup()
{
pre_cleanup
forwarding_restore
switch_destroy
h1_destroy
vrf_cleanup
}
ping_ipv4()
{
ping_test $h1.10 192.0.2.1
}
ping_ipv6()
{
ping6_test $h1.10 2001:db8:1::1
}
min_max_mtu_get_if()
{
local dev=$1; shift
local min_max=$1; shift
ip -d -j link show $dev | jq ".[].$min_max"
}
ensure_compatible_min_max_mtu()
{
local min_max=$1; shift
local mtu=$(min_max_mtu_get_if ${NETIFS[p1]} $min_max)
local i
for ((i = 2; i <= NUM_NETIFS; ++i)); do
local current_mtu=$(min_max_mtu_get_if ${NETIFS[p$i]} $min_max)
if [ $current_mtu -ne $mtu ]; then
return 1
fi
done
}
mtu_set_if()
{
local dev=$1; shift
local mtu=$1; shift
local should_fail=${1:-0}; shift
mtu_set $dev $mtu 2>/dev/null
check_err_fail $should_fail $? "Set MTU $mtu for $dev"
}
mtu_set_all_if()
{
local mtu=$1; shift
local i
for ((i = 1; i <= NUM_NETIFS; ++i)); do
mtu_set_if ${NETIFS[p$i]} $mtu
mtu_set_if ${NETIFS[p$i]}.10 $mtu
done
}
mtu_restore_all_if()
{
local i
for ((i = 1; i <= NUM_NETIFS; ++i)); do
mtu_restore ${NETIFS[p$i]}.10
mtu_restore ${NETIFS[p$i]}
done
}
mtu_test_ping4()
{
local mtu=$1; shift
local should_fail=$1; shift
# Ping adds 8 bytes for ICMP header and 20 bytes for IP header
local ping_headers_len=$((20 + 8))
local pkt_size=$((mtu - ping_headers_len))
ping_do $h1.10 192.0.2.1 "-s $pkt_size -M do"
check_err_fail $should_fail $? "Ping, packet size: $pkt_size"
}
mtu_test_ping6()
{
local mtu=$1; shift
local should_fail=$1; shift
# Ping adds 8 bytes for ICMP header and 40 bytes for IPv6 header
local ping6_headers_len=$((40 + 8))
local pkt_size=$((mtu - ping6_headers_len))
ping6_do $h1.10 2001:db8:1::1 "-s $pkt_size -M do"
check_err_fail $should_fail $? "Ping6, packet size: $pkt_size"
}
max_mtu_config_test()
{
local i
RET=0
for ((i = 1; i <= NUM_NETIFS; ++i)); do
local dev=${NETIFS[p$i]}
local max_mtu=$(min_max_mtu_get_if $dev "max_mtu")
local should_fail
should_fail=0
mtu_set_if $dev $max_mtu $should_fail
mtu_restore $dev
should_fail=1
mtu_set_if $dev $((max_mtu + 1)) $should_fail
mtu_restore $dev
done
log_test "Test maximum MTU configuration"
}
max_mtu_traffic_test()
{
local should_fail
local max_mtu
RET=0
if ! ensure_compatible_min_max_mtu "max_mtu"; then
log_test_xfail "Topology has incompatible maximum MTU values"
return
fi
max_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "max_mtu")
should_fail=0
mtu_set_all_if $max_mtu
mtu_test_ping4 $max_mtu $should_fail
mtu_test_ping6 $max_mtu $should_fail
mtu_restore_all_if
should_fail=1
mtu_set_all_if $((max_mtu - 1))
mtu_test_ping4 $max_mtu $should_fail
mtu_test_ping6 $max_mtu $should_fail
mtu_restore_all_if
log_test "Test traffic, packet size is maximum MTU"
}
min_mtu_config_test()
{
local i
RET=0
for ((i = 1; i <= NUM_NETIFS; ++i)); do
local dev=${NETIFS[p$i]}
local min_mtu=$(min_max_mtu_get_if $dev "min_mtu")
local should_fail
should_fail=0
mtu_set_if $dev $min_mtu $should_fail
mtu_restore $dev
should_fail=1
mtu_set_if $dev $((min_mtu - 1)) $should_fail
mtu_restore $dev
done
log_test "Test minimum MTU configuration"
}
min_mtu_traffic_test()
{
local should_fail=0
local min_mtu
RET=0
if ! ensure_compatible_min_max_mtu "min_mtu"; then
log_test_xfail "Topology has incompatible minimum MTU values"
return
fi
min_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "min_mtu")
mtu_set_all_if $min_mtu
mtu_test_ping4 $min_mtu $should_fail
# Do not test minimum MTU with IPv6, as IPv6 requires higher MTU.
mtu_restore_all_if
log_test "Test traffic, packet size is minimum MTU"
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment