Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
2d43f112
Commit
2d43f112
authored
Nov 05, 2005
by
Arnaldo Carvalho de Melo
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'red' of 84.73.165.173:/home/tgr/repos/net-2.6
parents
6df71634
bdc450a0
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
891 additions
and
771 deletions
+891
-771
include/linux/pkt_sched.h
include/linux/pkt_sched.h
+24
-26
include/net/inet_ecn.h
include/net/inet_ecn.h
+24
-4
include/net/red.h
include/net/red.h
+325
-0
net/sched/sch_gred.c
net/sched/sch_gred.c
+411
-430
net/sched/sch_red.c
net/sched/sch_red.c
+107
-311
No files found.
include/linux/pkt_sched.h
View file @
2d43f112
...
@@ -93,6 +93,7 @@ struct tc_fifo_qopt
...
@@ -93,6 +93,7 @@ struct tc_fifo_qopt
/* PRIO section */
/* PRIO section */
#define TCQ_PRIO_BANDS 16
#define TCQ_PRIO_BANDS 16
#define TCQ_MIN_PRIO_BANDS 2
struct
tc_prio_qopt
struct
tc_prio_qopt
{
{
...
@@ -169,6 +170,7 @@ struct tc_red_qopt
...
@@ -169,6 +170,7 @@ struct tc_red_qopt
unsigned
char
Scell_log
;
/* cell size for idle damping */
unsigned
char
Scell_log
;
/* cell size for idle damping */
unsigned
char
flags
;
unsigned
char
flags
;
#define TC_RED_ECN 1
#define TC_RED_ECN 1
#define TC_RED_HARDDROP 2
};
};
struct
tc_red_xstats
struct
tc_red_xstats
...
@@ -194,38 +196,34 @@ enum
...
@@ -194,38 +196,34 @@ enum
#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
#define TCA_SET_OFF TCA_GRED_PARMS
struct
tc_gred_qopt
struct
tc_gred_qopt
{
{
__u32
limit
;
/* HARD maximal queue length (bytes)
__u32
limit
;
/* HARD maximal queue length (bytes) */
*/
__u32
qth_min
;
/* Min average length threshold (bytes) */
__u32
qth_min
;
/* Min average length threshold (bytes)
__u32
qth_max
;
/* Max average length threshold (bytes) */
*/
__u32
DP
;
/* upto 2^32 DPs */
__u32
qth_max
;
/* Max average length threshold (bytes)
__u32
backlog
;
*/
__u32
qave
;
__u32
DP
;
/* upto 2^32 DPs */
__u32
forced
;
__u32
backlog
;
__u32
early
;
__u32
qave
;
__u32
other
;
__u32
forced
;
__u32
pdrop
;
__u32
early
;
__u8
Wlog
;
/* log(W) */
__u32
other
;
__u8
Plog
;
/* log(P_max/(qth_max-qth_min)) */
__u32
pdrop
;
__u8
Scell_log
;
/* cell size for idle damping */
__u8
prio
;
/* prio of this VQ */
unsigned
char
Wlog
;
/* log(W) */
__u32
packets
;
unsigned
char
Plog
;
/* log(P_max/(qth_max-qth_min)) */
__u32
bytesin
;
unsigned
char
Scell_log
;
/* cell size for idle damping */
__u8
prio
;
/* prio of this VQ */
__u32
packets
;
__u32
bytesin
;
};
};
/* gred setup */
/* gred setup */
struct
tc_gred_sopt
struct
tc_gred_sopt
{
{
__u32
DPs
;
__u32
DPs
;
__u32
def_DP
;
__u32
def_DP
;
__u8
grio
;
__u8
grio
;
__u8
pad1
;
__u8
flags
;
__u16
pad2
;
__u16
pad1
;
};
};
/* HTB section */
/* HTB section */
...
...
include/net/inet_ecn.h
View file @
2d43f112
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
#define _INET_ECN_H_
#define _INET_ECN_H_
#include <linux/ip.h>
#include <linux/ip.h>
#include <linux/skbuff.h>
#include <net/dsfield.h>
#include <net/dsfield.h>
enum
{
enum
{
...
@@ -48,7 +49,7 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
...
@@ -48,7 +49,7 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
(label) |= __constant_htons(INET_ECN_ECT_0 << 4); \
(label) |= __constant_htons(INET_ECN_ECT_0 << 4); \
} while (0)
} while (0)
static
inline
void
IP_ECN_set_ce
(
struct
iphdr
*
iph
)
static
inline
int
IP_ECN_set_ce
(
struct
iphdr
*
iph
)
{
{
u32
check
=
iph
->
check
;
u32
check
=
iph
->
check
;
u32
ecn
=
(
iph
->
tos
+
1
)
&
INET_ECN_MASK
;
u32
ecn
=
(
iph
->
tos
+
1
)
&
INET_ECN_MASK
;
...
@@ -61,7 +62,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
...
@@ -61,7 +62,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
* INET_ECN_CE => 00
* INET_ECN_CE => 00
*/
*/
if
(
!
(
ecn
&
2
))
if
(
!
(
ecn
&
2
))
return
;
return
!
ecn
;
/*
/*
* The following gives us:
* The following gives us:
...
@@ -72,6 +73,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
...
@@ -72,6 +73,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
iph
->
check
=
check
+
(
check
>=
0xFFFF
);
iph
->
check
=
check
+
(
check
>=
0xFFFF
);
iph
->
tos
|=
INET_ECN_CE
;
iph
->
tos
|=
INET_ECN_CE
;
return
1
;
}
}
static
inline
void
IP_ECN_clear
(
struct
iphdr
*
iph
)
static
inline
void
IP_ECN_clear
(
struct
iphdr
*
iph
)
...
@@ -87,11 +89,12 @@ static inline void ipv4_copy_dscp(struct iphdr *outer, struct iphdr *inner)
...
@@ -87,11 +89,12 @@ static inline void ipv4_copy_dscp(struct iphdr *outer, struct iphdr *inner)
struct
ipv6hdr
;
struct
ipv6hdr
;
static
inline
void
IP6_ECN_set_ce
(
struct
ipv6hdr
*
iph
)
static
inline
int
IP6_ECN_set_ce
(
struct
ipv6hdr
*
iph
)
{
{
if
(
INET_ECN_is_not_ect
(
ipv6_get_dsfield
(
iph
)))
if
(
INET_ECN_is_not_ect
(
ipv6_get_dsfield
(
iph
)))
return
;
return
0
;
*
(
u32
*
)
iph
|=
htonl
(
INET_ECN_CE
<<
20
);
*
(
u32
*
)
iph
|=
htonl
(
INET_ECN_CE
<<
20
);
return
1
;
}
}
static
inline
void
IP6_ECN_clear
(
struct
ipv6hdr
*
iph
)
static
inline
void
IP6_ECN_clear
(
struct
ipv6hdr
*
iph
)
...
@@ -105,4 +108,21 @@ static inline void ipv6_copy_dscp(struct ipv6hdr *outer, struct ipv6hdr *inner)
...
@@ -105,4 +108,21 @@ static inline void ipv6_copy_dscp(struct ipv6hdr *outer, struct ipv6hdr *inner)
ipv6_change_dsfield
(
inner
,
INET_ECN_MASK
,
dscp
);
ipv6_change_dsfield
(
inner
,
INET_ECN_MASK
,
dscp
);
}
}
static
inline
int
INET_ECN_set_ce
(
struct
sk_buff
*
skb
)
{
switch
(
skb
->
protocol
)
{
case
__constant_htons
(
ETH_P_IP
):
if
(
skb
->
nh
.
raw
+
sizeof
(
struct
iphdr
)
<=
skb
->
tail
)
return
IP_ECN_set_ce
(
skb
->
nh
.
iph
);
break
;
case
__constant_htons
(
ETH_P_IPV6
):
if
(
skb
->
nh
.
raw
+
sizeof
(
struct
ipv6hdr
)
<=
skb
->
tail
)
return
IP6_ECN_set_ce
(
skb
->
nh
.
ipv6h
);
break
;
}
return
0
;
}
#endif
#endif
include/net/red.h
0 → 100644
View file @
2d43f112
#ifndef __NET_SCHED_RED_H
#define __NET_SCHED_RED_H
#include <linux/config.h>
#include <linux/types.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/dsfield.h>
/* Random Early Detection (RED) algorithm.
=======================================
Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
This file codes a "divisionless" version of RED algorithm
as written down in Fig.17 of the paper.
Short description.
------------------
When a new packet arrives we calculate the average queue length:
avg = (1-W)*avg + W*current_queue_len,
W is the filter time constant (chosen as 2^(-Wlog)), it controls
the inertia of the algorithm. To allow larger bursts, W should be
decreased.
if (avg > th_max) -> packet marked (dropped).
if (avg < th_min) -> packet passes.
if (th_min < avg < th_max) we calculate probability:
Pb = max_P * (avg - th_min)/(th_max-th_min)
and mark (drop) packet with this probability.
Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
max_P should be small (not 1), usually 0.01..0.02 is good value.
max_P is chosen as a number, so that max_P/(th_max-th_min)
is a negative power of two in order arithmetics to contain
only shifts.
Parameters, settable by user:
-----------------------------
qth_min - bytes (should be < qth_max/2)
qth_max - bytes (should be at least 2*qth_min and less limit)
Wlog - bits (<32) log(1/W).
Plog - bits (<32)
Plog is related to max_P by formula:
max_P = (qth_max-qth_min)/2^Plog;
F.e. if qth_max=128K and qth_min=32K, then Plog=22
corresponds to max_P=0.02
Scell_log
Stab
Lookup table for log((1-W)^(t/t_ave).
NOTES:
Upper bound on W.
-----------------
If you want to allow bursts of L packets of size S,
you should choose W:
L + 1 - th_min/S < (1-(1-W)^L)/W
th_min/S = 32 th_min/S = 4
log(W) L
-1 33
-2 35
-3 39
-4 46
-5 57
-6 75
-7 101
-8 135
-9 190
etc.
*/
#define RED_STAB_SIZE 256
#define RED_STAB_MASK (RED_STAB_SIZE - 1)
struct
red_stats
{
u32
prob_drop
;
/* Early probability drops */
u32
prob_mark
;
/* Early probability marks */
u32
forced_drop
;
/* Forced drops, qavg > max_thresh */
u32
forced_mark
;
/* Forced marks, qavg > max_thresh */
u32
pdrop
;
/* Drops due to queue limits */
u32
other
;
/* Drops due to drop() calls */
u32
backlog
;
};
struct
red_parms
{
/* Parameters */
u32
qth_min
;
/* Min avg length threshold: A scaled */
u32
qth_max
;
/* Max avg length threshold: A scaled */
u32
Scell_max
;
u32
Rmask
;
/* Cached random mask, see red_rmask */
u8
Scell_log
;
u8
Wlog
;
/* log(W) */
u8
Plog
;
/* random number bits */
u8
Stab
[
RED_STAB_SIZE
];
/* Variables */
int
qcount
;
/* Number of packets since last random
number generation */
u32
qR
;
/* Cached random number */
unsigned
long
qavg
;
/* Average queue length: A scaled */
psched_time_t
qidlestart
;
/* Start of current idle period */
};
static
inline
u32
red_rmask
(
u8
Plog
)
{
return
Plog
<
32
?
((
1
<<
Plog
)
-
1
)
:
~
0UL
;
}
static
inline
void
red_set_parms
(
struct
red_parms
*
p
,
u32
qth_min
,
u32
qth_max
,
u8
Wlog
,
u8
Plog
,
u8
Scell_log
,
u8
*
stab
)
{
/* Reset average queue length, the value is strictly bound
* to the parameters below, reseting hurts a bit but leaving
* it might result in an unreasonable qavg for a while. --TGR
*/
p
->
qavg
=
0
;
p
->
qcount
=
-
1
;
p
->
qth_min
=
qth_min
<<
Wlog
;
p
->
qth_max
=
qth_max
<<
Wlog
;
p
->
Wlog
=
Wlog
;
p
->
Plog
=
Plog
;
p
->
Rmask
=
red_rmask
(
Plog
);
p
->
Scell_log
=
Scell_log
;
p
->
Scell_max
=
(
255
<<
Scell_log
);
memcpy
(
p
->
Stab
,
stab
,
sizeof
(
p
->
Stab
));
}
static
inline
int
red_is_idling
(
struct
red_parms
*
p
)
{
return
!
PSCHED_IS_PASTPERFECT
(
p
->
qidlestart
);
}
static
inline
void
red_start_of_idle_period
(
struct
red_parms
*
p
)
{
PSCHED_GET_TIME
(
p
->
qidlestart
);
}
static
inline
void
red_end_of_idle_period
(
struct
red_parms
*
p
)
{
PSCHED_SET_PASTPERFECT
(
p
->
qidlestart
);
}
static
inline
void
red_restart
(
struct
red_parms
*
p
)
{
red_end_of_idle_period
(
p
);
p
->
qavg
=
0
;
p
->
qcount
=
-
1
;
}
static
inline
unsigned
long
red_calc_qavg_from_idle_time
(
struct
red_parms
*
p
)
{
psched_time_t
now
;
long
us_idle
;
int
shift
;
PSCHED_GET_TIME
(
now
);
us_idle
=
PSCHED_TDIFF_SAFE
(
now
,
p
->
qidlestart
,
p
->
Scell_max
);
/*
* The problem: ideally, average length queue recalcultion should
* be done over constant clock intervals. This is too expensive, so
* that the calculation is driven by outgoing packets.
* When the queue is idle we have to model this clock by hand.
*
* SF+VJ proposed to "generate":
*
* m = idletime / (average_pkt_size / bandwidth)
*
* dummy packets as a burst after idle time, i.e.
*
* p->qavg *= (1-W)^m
*
* This is an apparently overcomplicated solution (f.e. we have to
* precompute a table to make this calculation in reasonable time)
* I believe that a simpler model may be used here,
* but it is field for experiments.
*/
shift
=
p
->
Stab
[(
us_idle
>>
p
->
Scell_log
)
&
RED_STAB_MASK
];
if
(
shift
)
return
p
->
qavg
>>
shift
;
else
{
/* Approximate initial part of exponent with linear function:
*
* (1-W)^m ~= 1-mW + ...
*
* Seems, it is the best solution to
* problem of too coarse exponent tabulation.
*/
us_idle
=
(
p
->
qavg
*
us_idle
)
>>
p
->
Scell_log
;
if
(
us_idle
<
(
p
->
qavg
>>
1
))
return
p
->
qavg
-
us_idle
;
else
return
p
->
qavg
>>
1
;
}
}
static
inline
unsigned
long
red_calc_qavg_no_idle_time
(
struct
red_parms
*
p
,
unsigned
int
backlog
)
{
/*
* NOTE: p->qavg is fixed point number with point at Wlog.
* The formula below is equvalent to floating point
* version:
*
* qavg = qavg*(1-W) + backlog*W;
*
* --ANK (980924)
*/
return
p
->
qavg
+
(
backlog
-
(
p
->
qavg
>>
p
->
Wlog
));
}
static
inline
unsigned
long
red_calc_qavg
(
struct
red_parms
*
p
,
unsigned
int
backlog
)
{
if
(
!
red_is_idling
(
p
))
return
red_calc_qavg_no_idle_time
(
p
,
backlog
);
else
return
red_calc_qavg_from_idle_time
(
p
);
}
static
inline
u32
red_random
(
struct
red_parms
*
p
)
{
return
net_random
()
&
p
->
Rmask
;
}
static
inline
int
red_mark_probability
(
struct
red_parms
*
p
,
unsigned
long
qavg
)
{
/* The formula used below causes questions.
OK. qR is random number in the interval 0..Rmask
i.e. 0..(2^Plog). If we used floating point
arithmetics, it would be: (2^Plog)*rnd_num,
where rnd_num is less 1.
Taking into account, that qavg have fixed
point at Wlog, and Plog is related to max_P by
max_P = (qth_max-qth_min)/2^Plog; two lines
below have the following floating point equivalent:
max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount
Any questions? --ANK (980924)
*/
return
!
(((
qavg
-
p
->
qth_min
)
>>
p
->
Wlog
)
*
p
->
qcount
<
p
->
qR
);
}
enum
{
RED_BELOW_MIN_THRESH
,
RED_BETWEEN_TRESH
,
RED_ABOVE_MAX_TRESH
,
};
static
inline
int
red_cmp_thresh
(
struct
red_parms
*
p
,
unsigned
long
qavg
)
{
if
(
qavg
<
p
->
qth_min
)
return
RED_BELOW_MIN_THRESH
;
else
if
(
qavg
>=
p
->
qth_max
)
return
RED_ABOVE_MAX_TRESH
;
else
return
RED_BETWEEN_TRESH
;
}
enum
{
RED_DONT_MARK
,
RED_PROB_MARK
,
RED_HARD_MARK
,
};
static
inline
int
red_action
(
struct
red_parms
*
p
,
unsigned
long
qavg
)
{
switch
(
red_cmp_thresh
(
p
,
qavg
))
{
case
RED_BELOW_MIN_THRESH
:
p
->
qcount
=
-
1
;
return
RED_DONT_MARK
;
case
RED_BETWEEN_TRESH
:
if
(
++
p
->
qcount
)
{
if
(
red_mark_probability
(
p
,
qavg
))
{
p
->
qcount
=
0
;
p
->
qR
=
red_random
(
p
);
return
RED_PROB_MARK
;
}
}
else
p
->
qR
=
red_random
(
p
);
return
RED_DONT_MARK
;
case
RED_ABOVE_MAX_TRESH
:
p
->
qcount
=
-
1
;
return
RED_HARD_MARK
;
}
BUG
();
return
RED_DONT_MARK
;
}
#endif
net/sched/sch_gred.c
View file @
2d43f112
...
@@ -15,247 +15,281 @@
...
@@ -15,247 +15,281 @@
* from Ren Liu
* from Ren Liu
* - More error checks
* - More error checks
*
*
*
* For all the glorious comments look at include/net/red.h
*
* For all the glorious comments look at Alexey's sch_red.c
*/
*/
#include <linux/config.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/pkt_sched.h>
#include <net/red.h>
#if 1
/* control */
#define GRED_DEF_PRIO (MAX_DPs / 2)
#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
#define GRED_VQ_MASK (MAX_DPs - 1)
#else
#define DPRINTK(format,args...)
#endif
#if 0 /* data */
#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
#else
#define D2PRINTK(format,args...)
#endif
struct
gred_sched_data
;
struct
gred_sched_data
;
struct
gred_sched
;
struct
gred_sched
;
struct
gred_sched_data
struct
gred_sched_data
{
{
/* Parameters */
u32
limit
;
/* HARD maximal queue length */
u32
limit
;
/* HARD maximal queue length */
u32
qth_min
;
/* Min average length threshold: A scaled */
u32
qth_max
;
/* Max average length threshold: A scaled */
u32
DP
;
/* the drop pramaters */
u32
DP
;
/* the drop pramaters */
char
Wlog
;
/* log(W) */
char
Plog
;
/* random number bits */
u32
Scell_max
;
u32
Rmask
;
u32
bytesin
;
/* bytes seen on virtualQ so far*/
u32
bytesin
;
/* bytes seen on virtualQ so far*/
u32
packetsin
;
/* packets seen on virtualQ so far*/
u32
packetsin
;
/* packets seen on virtualQ so far*/
u32
backlog
;
/* bytes on the virtualQ */
u32
backlog
;
/* bytes on the virtualQ */
u32
forced
;
/* packets dropped for exceeding limits */
u8
prio
;
/* the prio of this vq */
u32
early
;
/* packets dropped as a warning */
u32
other
;
/* packets dropped by invoking drop() */
struct
red_parms
parms
;
u32
pdrop
;
/* packets dropped because we exceeded physical queue limits */
struct
red_stats
stats
;
char
Scell_log
;
};
u8
Stab
[
256
];
u8
prio
;
/* the prio of this vq */
enum
{
GRED_WRED_MODE
=
1
,
/* Variables */
GRED_RIO_MODE
,
unsigned
long
qave
;
/* Average queue length: A scaled */
int
qcount
;
/* Packets since last random number generation */
u32
qR
;
/* Cached random number */
psched_time_t
qidlestart
;
/* Start of idle period */
};
};
struct
gred_sched
struct
gred_sched
{
{
struct
gred_sched_data
*
tab
[
MAX_DPs
];
struct
gred_sched_data
*
tab
[
MAX_DPs
];
u
32
DPs
;
u
nsigned
long
flags
;
u32
def
;
u32
red_flags
;
u
8
initd
;
u
32
DPs
;
u
8
grio
;
u
32
def
;
u8
eqp
;
struct
red_parms
wred_set
;
};
};
static
int
static
inline
int
gred_wred_mode
(
struct
gred_sched
*
table
)
gred_enqueue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
{
{
psched_time_t
now
;
return
test_bit
(
GRED_WRED_MODE
,
&
table
->
flags
);
struct
gred_sched_data
*
q
=
NULL
;
}
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
unsigned
long
qave
=
0
;
static
inline
void
gred_enable_wred_mode
(
struct
gred_sched
*
table
)
int
i
=
0
;
{
__set_bit
(
GRED_WRED_MODE
,
&
table
->
flags
);
}
static
inline
void
gred_disable_wred_mode
(
struct
gred_sched
*
table
)
{
__clear_bit
(
GRED_WRED_MODE
,
&
table
->
flags
);
}
static
inline
int
gred_rio_mode
(
struct
gred_sched
*
table
)
{
return
test_bit
(
GRED_RIO_MODE
,
&
table
->
flags
);
}
static
inline
void
gred_enable_rio_mode
(
struct
gred_sched
*
table
)
{
__set_bit
(
GRED_RIO_MODE
,
&
table
->
flags
);
}
static
inline
void
gred_disable_rio_mode
(
struct
gred_sched
*
table
)
{
__clear_bit
(
GRED_RIO_MODE
,
&
table
->
flags
);
}
static
inline
int
gred_wred_mode_check
(
struct
Qdisc
*
sch
)
{
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
int
i
;
if
(
!
t
->
initd
&&
skb_queue_len
(
&
sch
->
q
)
<
(
sch
->
dev
->
tx_queue_len
?
:
1
))
{
/* Really ugly O(n^2) but shouldn't be necessary too frequent. */
D2PRINTK
(
"NO GRED Queues setup yet! Enqueued anyway
\n
"
);
for
(
i
=
0
;
i
<
table
->
DPs
;
i
++
)
{
goto
do_enqueue
;
struct
gred_sched_data
*
q
=
table
->
tab
[
i
];
int
n
;
if
(
q
==
NULL
)
continue
;
for
(
n
=
0
;
n
<
table
->
DPs
;
n
++
)
if
(
table
->
tab
[
n
]
&&
table
->
tab
[
n
]
!=
q
&&
table
->
tab
[
n
]
->
prio
==
q
->
prio
)
return
1
;
}
}
return
0
;
}
static
inline
unsigned
int
gred_backlog
(
struct
gred_sched
*
table
,
struct
gred_sched_data
*
q
,
struct
Qdisc
*
sch
)
{
if
(
gred_wred_mode
(
table
))
return
sch
->
qstats
.
backlog
;
else
return
q
->
backlog
;
}
static
inline
u16
tc_index_to_dp
(
struct
sk_buff
*
skb
)
{
return
skb
->
tc_index
&
GRED_VQ_MASK
;
}
static
inline
void
gred_load_wred_set
(
struct
gred_sched
*
table
,
struct
gred_sched_data
*
q
)
{
q
->
parms
.
qavg
=
table
->
wred_set
.
qavg
;
q
->
parms
.
qidlestart
=
table
->
wred_set
.
qidlestart
;
}
static
inline
void
gred_store_wred_set
(
struct
gred_sched
*
table
,
struct
gred_sched_data
*
q
)
{
table
->
wred_set
.
qavg
=
q
->
parms
.
qavg
;
}
static
inline
int
gred_use_ecn
(
struct
gred_sched
*
t
)
{
return
t
->
red_flags
&
TC_RED_ECN
;
}
if
(
((
skb
->
tc_index
&
0xf
)
>
(
t
->
DPs
-
1
))
||
!
(
q
=
t
->
tab
[
skb
->
tc_index
&
0xf
]))
{
static
inline
int
gred_use_harddrop
(
struct
gred_sched
*
t
)
printk
(
"GRED: setting to default (%d)
\n
"
,
t
->
def
);
{
if
(
!
(
q
=
t
->
tab
[
t
->
def
]))
{
return
t
->
red_flags
&
TC_RED_HARDDROP
;
DPRINTK
(
"GRED: setting to default FAILED! dropping!! "
}
"(%d)
\n
"
,
t
->
def
);
goto
drop
;
static
int
gred_enqueue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
{
struct
gred_sched_data
*
q
=
NULL
;
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
unsigned
long
qavg
=
0
;
u16
dp
=
tc_index_to_dp
(
skb
);
if
(
dp
>=
t
->
DPs
||
(
q
=
t
->
tab
[
dp
])
==
NULL
)
{
dp
=
t
->
def
;
if
((
q
=
t
->
tab
[
dp
])
==
NULL
)
{
/* Pass through packets not assigned to a DP
* if no default DP has been configured. This
* allows for DP flows to be left untouched.
*/
if
(
skb_queue_len
(
&
sch
->
q
)
<
sch
->
dev
->
tx_queue_len
)
return
qdisc_enqueue_tail
(
skb
,
sch
);
else
goto
drop
;
}
}
/* fix tc_index? --could be controvesial but needed for
/* fix tc_index? --could be controvesial but needed for
requeueing */
requeueing */
skb
->
tc_index
=
(
skb
->
tc_index
&
0xfffffff0
)
|
t
->
def
;
skb
->
tc_index
=
(
skb
->
tc_index
&
~
GRED_VQ_MASK
)
|
dp
;
}
}
D2PRINTK
(
"gred_enqueue virtualQ 0x%x classid %x backlog %d "
/* sum up all the qaves of prios <= to ours to get the new qave */
"general backlog %d
\n
"
,
skb
->
tc_index
&
0xf
,
sch
->
handle
,
q
->
backlog
,
if
(
!
gred_wred_mode
(
t
)
&&
gred_rio_mode
(
t
))
{
sch
->
qstats
.
backlog
);
int
i
;
/* sum up all the qaves of prios <= to ours to get the new qave*/
if
(
!
t
->
eqp
&&
t
->
grio
)
{
for
(
i
=
0
;
i
<
t
->
DPs
;
i
++
)
{
for
(
i
=
0
;
i
<
t
->
DPs
;
i
++
)
{
if
(
t
->
tab
[
i
]
&&
t
->
tab
[
i
]
->
prio
<
q
->
prio
&&
if
((
!
t
->
tab
[
i
])
||
(
i
==
q
->
DP
))
!
red_is_idling
(
&
t
->
tab
[
i
]
->
parms
))
continue
;
qavg
+=
t
->
tab
[
i
]
->
parms
.
qavg
;
if
((
t
->
tab
[
i
]
->
prio
<
q
->
prio
)
&&
(
PSCHED_IS_PASTPERFECT
(
t
->
tab
[
i
]
->
qidlestart
)))
qave
+=
t
->
tab
[
i
]
->
qave
;
}
}
}
}
q
->
packetsin
++
;
q
->
packetsin
++
;
q
->
bytesin
+=
skb
->
len
;
q
->
bytesin
+=
skb
->
len
;
if
(
t
->
eqp
&&
t
->
grio
)
{
if
(
gred_wred_mode
(
t
))
qave
=
0
;
gred_load_wred_set
(
t
,
q
);
q
->
qave
=
t
->
tab
[
t
->
def
]
->
qave
;
q
->
qidlestart
=
t
->
tab
[
t
->
def
]
->
qidlestart
;
}
if
(
!
PSCHED_IS_PASTPERFECT
(
q
->
qidlestart
))
{
q
->
parms
.
qavg
=
red_calc_qavg
(
&
q
->
parms
,
gred_backlog
(
t
,
q
,
sch
));
long
us_idle
;
PSCHED_GET_TIME
(
now
);
us_idle
=
PSCHED_TDIFF_SAFE
(
now
,
q
->
qidlestart
,
q
->
Scell_max
);
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
q
->
qave
>>=
q
->
Stab
[(
us_idle
>>
q
->
Scell_log
)
&
0xFF
];
if
(
red_is_idling
(
&
q
->
parms
))
}
else
{
red_end_of_idle_period
(
&
q
->
parms
);
if
(
t
->
eqp
)
{
q
->
qave
+=
sch
->
qstats
.
backlog
-
(
q
->
qave
>>
q
->
Wlog
);
}
else
{
q
->
qave
+=
q
->
backlog
-
(
q
->
qave
>>
q
->
Wlog
);
}
}
if
(
gred_wred_mode
(
t
))
gred_store_wred_set
(
t
,
q
);
if
(
t
->
eqp
&&
t
->
grio
)
t
->
tab
[
t
->
def
]
->
qave
=
q
->
qave
;
if
((
q
->
qave
+
qave
)
<
q
->
qth_min
)
{
q
->
qcount
=
-
1
;
enqueue:
if
(
q
->
backlog
+
skb
->
len
<=
q
->
limit
)
{
q
->
backlog
+=
skb
->
len
;
do_enqueue:
__skb_queue_tail
(
&
sch
->
q
,
skb
);
sch
->
qstats
.
backlog
+=
skb
->
len
;
sch
->
bstats
.
bytes
+=
skb
->
len
;
sch
->
bstats
.
packets
++
;
return
0
;
}
else
{
q
->
pdrop
++
;
}
drop:
switch
(
red_action
(
&
q
->
parms
,
q
->
parms
.
qavg
+
qavg
))
{
kfree_skb
(
skb
);
case
RED_DONT_MARK
:
sch
->
qstats
.
drops
++
;
break
;
return
NET_XMIT_DROP
;
}
case
RED_PROB_MARK
:
if
((
q
->
qave
+
qave
)
>=
q
->
qth_max
)
{
sch
->
qstats
.
overlimits
++
;
q
->
qcount
=
-
1
;
if
(
!
gred_use_ecn
(
t
)
||
!
INET_ECN_set_ce
(
skb
))
{
sch
->
qstats
.
overlimits
++
;
q
->
stats
.
prob_drop
++
;
q
->
forced
++
;
goto
congestion_drop
;
goto
drop
;
}
q
->
stats
.
prob_mark
++
;
break
;
case
RED_HARD_MARK
:
sch
->
qstats
.
overlimits
++
;
if
(
gred_use_harddrop
(
t
)
||
!
gred_use_ecn
(
t
)
||
!
INET_ECN_set_ce
(
skb
))
{
q
->
stats
.
forced_drop
++
;
goto
congestion_drop
;
}
q
->
stats
.
forced_mark
++
;
break
;
}
}
if
(
++
q
->
qcount
)
{
if
((((
qave
+
q
->
qave
)
-
q
->
qth_min
)
>>
q
->
Wlog
)
*
q
->
qcount
<
q
->
qR
)
if
(
q
->
backlog
+
skb
->
len
<=
q
->
limit
)
{
goto
enqueue
;
q
->
backlog
+=
skb
->
len
;
q
->
qcount
=
0
;
return
qdisc_enqueue_tail
(
skb
,
sch
);
q
->
qR
=
net_random
()
&
q
->
Rmask
;
sch
->
qstats
.
overlimits
++
;
q
->
early
++
;
goto
drop
;
}
}
q
->
qR
=
net_random
()
&
q
->
Rmask
;
goto
enqueue
;
q
->
stats
.
pdrop
++
;
drop:
return
qdisc_drop
(
skb
,
sch
);
congestion_drop:
qdisc_drop
(
skb
,
sch
);
return
NET_XMIT_CN
;
}
}
static
int
static
int
gred_requeue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
gred_requeue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
{
{
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
struct
gred_sched_data
*
q
;
struct
gred_sched_data
*
q
;
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
u16
dp
=
tc_index_to_dp
(
skb
);
q
=
t
->
tab
[(
skb
->
tc_index
&
0xf
)];
/* error checking here -- probably unnecessary */
if
(
dp
>=
t
->
DPs
||
(
q
=
t
->
tab
[
dp
])
==
NULL
)
{
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
if
(
net_ratelimit
())
printk
(
KERN_WARNING
"GRED: Unable to relocate VQ 0x%x "
__skb_queue_head
(
&
sch
->
q
,
skb
);
"for requeue, screwing up backlog.
\n
"
,
sch
->
qstats
.
backlog
+=
skb
->
len
;
tc_index_to_dp
(
skb
));
sch
->
qstats
.
requeues
++
;
}
else
{
q
->
backlog
+=
skb
->
len
;
if
(
red_is_idling
(
&
q
->
parms
))
return
0
;
red_end_of_idle_period
(
&
q
->
parms
);
q
->
backlog
+=
skb
->
len
;
}
return
qdisc_requeue
(
skb
,
sch
);
}
}
static
struct
sk_buff
*
static
struct
sk_buff
*
gred_dequeue
(
struct
Qdisc
*
sch
)
gred_dequeue
(
struct
Qdisc
*
sch
)
{
{
struct
sk_buff
*
skb
;
struct
sk_buff
*
skb
;
struct
gred_sched_data
*
q
;
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
skb
=
qdisc_dequeue_head
(
sch
);
skb
=
__skb_dequeue
(
&
sch
->
q
);
if
(
skb
)
{
if
(
skb
)
{
sch
->
qstats
.
backlog
-=
skb
->
len
;
struct
gred_sched_data
*
q
;
q
=
t
->
tab
[(
skb
->
tc_index
&
0xf
)];
u16
dp
=
tc_index_to_dp
(
skb
);
if
(
q
)
{
q
->
backlog
-=
skb
->
len
;
if
(
dp
>=
t
->
DPs
||
(
q
=
t
->
tab
[
dp
])
==
NULL
)
{
if
(
!
q
->
backlog
&&
!
t
->
eqp
)
if
(
net_ratelimit
())
PSCHED_GET_TIME
(
q
->
qidlestart
);
printk
(
KERN_WARNING
"GRED: Unable to relocate "
"VQ 0x%x after dequeue, screwing up "
"backlog.
\n
"
,
tc_index_to_dp
(
skb
));
}
else
{
}
else
{
D2PRINTK
(
"gred_dequeue: skb has bad tcindex %x
\n
"
,
skb
->
tc_index
&
0xf
);
q
->
backlog
-=
skb
->
len
;
if
(
!
q
->
backlog
&&
!
gred_wred_mode
(
t
))
red_start_of_idle_period
(
&
q
->
parms
);
}
}
return
skb
;
return
skb
;
}
}
if
(
t
->
eqp
)
{
if
(
gred_wred_mode
(
t
)
&&
!
red_is_idling
(
&
t
->
wred_set
))
q
=
t
->
tab
[
t
->
def
];
red_start_of_idle_period
(
&
t
->
wred_set
);
if
(
!
q
)
D2PRINTK
(
"no default VQ set: Results will be "
"screwed up
\n
"
);
else
PSCHED_GET_TIME
(
q
->
qidlestart
);
}
return
NULL
;
return
NULL
;
}
}
...
@@ -263,36 +297,34 @@ gred_dequeue(struct Qdisc* sch)
...
@@ -263,36 +297,34 @@ gred_dequeue(struct Qdisc* sch)
static
unsigned
int
gred_drop
(
struct
Qdisc
*
sch
)
static
unsigned
int
gred_drop
(
struct
Qdisc
*
sch
)
{
{
struct
sk_buff
*
skb
;
struct
sk_buff
*
skb
;
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
struct
gred_sched_data
*
q
;
skb
=
qdisc_dequeue_tail
(
sch
);
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
skb
=
__skb_dequeue_tail
(
&
sch
->
q
);
if
(
skb
)
{
if
(
skb
)
{
unsigned
int
len
=
skb
->
len
;
unsigned
int
len
=
skb
->
len
;
s
ch
->
qstats
.
backlog
-=
len
;
s
truct
gred_sched_data
*
q
;
sch
->
qstats
.
drops
++
;
u16
dp
=
tc_index_to_dp
(
skb
)
;
q
=
t
->
tab
[(
skb
->
tc_index
&
0xf
)];
if
(
q
)
{
if
(
dp
>=
t
->
DPs
||
(
q
=
t
->
tab
[
dp
])
==
NULL
)
{
q
->
backlog
-=
len
;
if
(
net_ratelimit
())
q
->
other
++
;
printk
(
KERN_WARNING
"GRED: Unable to relocate "
if
(
!
q
->
backlog
&&
!
t
->
eqp
)
"VQ 0x%x while dropping, screwing up "
PSCHED_GET_TIME
(
q
->
qidlestart
);
"backlog.
\n
"
,
tc_index_to_dp
(
skb
)
);
}
else
{
}
else
{
D2PRINTK
(
"gred_dequeue: skb has bad tcindex %x
\n
"
,
skb
->
tc_index
&
0xf
);
q
->
backlog
-=
len
;
q
->
stats
.
other
++
;
if
(
!
q
->
backlog
&&
!
gred_wred_mode
(
t
))
red_start_of_idle_period
(
&
q
->
parms
);
}
}
kfree_skb
(
skb
);
qdisc_drop
(
skb
,
sch
);
return
len
;
return
len
;
}
}
q
=
t
->
tab
[
t
->
def
];
if
(
gred_wred_mode
(
t
)
&&
!
red_is_idling
(
&
t
->
wred_set
))
if
(
!
q
)
{
red_start_of_idle_period
(
&
t
->
wred_set
);
D2PRINTK
(
"no default VQ set: Results might be screwed up
\n
"
);
return
0
;
}
PSCHED_GET_TIME
(
q
->
qidlestart
);
return
0
;
return
0
;
}
}
...
@@ -300,293 +332,241 @@ static unsigned int gred_drop(struct Qdisc* sch)
...
@@ -300,293 +332,241 @@ static unsigned int gred_drop(struct Qdisc* sch)
static
void
gred_reset
(
struct
Qdisc
*
sch
)
static
void
gred_reset
(
struct
Qdisc
*
sch
)
{
{
int
i
;
int
i
;
struct
gred_sched_data
*
q
;
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
struct
gred_sched
*
t
=
qdisc_priv
(
sch
);
qdisc_reset_queue
(
sch
);
__skb_queue_purge
(
&
sch
->
q
);
for
(
i
=
0
;
i
<
t
->
DPs
;
i
++
)
{
struct
gred_sched_data
*
q
=
t
->
tab
[
i
];
sch
->
qstats
.
backlog
=
0
;
if
(
!
q
)
continue
;
for
(
i
=
0
;
i
<
t
->
DPs
;
i
++
)
{
red_restart
(
&
q
->
parms
);
q
=
t
->
tab
[
i
];
if
(
!
q
)
continue
;
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
q
->
qave
=
0
;
q
->
qcount
=
-
1
;
q
->
backlog
=
0
;
q
->
backlog
=
0
;
q
->
other
=
0
;
q
->
forced
=
0
;
q
->
pdrop
=
0
;
q
->
early
=
0
;
}
}
}
}
static
int
gred_change
(
struct
Qdisc
*
sch
,
struct
rtattr
*
opt
)
static
inline
void
gred_destroy_vq
(
struct
gred_sched_data
*
q
)
{
kfree
(
q
);
}
static
inline
int
gred_change_table_def
(
struct
Qdisc
*
sch
,
struct
rtattr
*
dps
)
{
{
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
struct
gred_sched_data
*
q
;
struct
tc_gred_qopt
*
ctl
;
struct
tc_gred_sopt
*
sopt
;
struct
tc_gred_sopt
*
sopt
;
struct
rtattr
*
tb
[
TCA_GRED_STAB
];
struct
rtattr
*
tb2
[
TCA_GRED_DPS
];
int
i
;
int
i
;
if
(
opt
==
NULL
||
rtattr_parse_nested
(
tb
,
TCA_GRED_STAB
,
opt
))
if
(
dps
==
NULL
||
RTA_PAYLOAD
(
dps
)
<
sizeof
(
*
s
opt
))
return
-
EINVAL
;
return
-
EINVAL
;
if
(
tb
[
TCA_GRED_PARMS
-
1
]
==
0
&&
tb
[
TCA_GRED_STAB
-
1
]
==
0
)
{
sopt
=
RTA_DATA
(
dps
);
rtattr_parse_nested
(
tb2
,
TCA_GRED_DPS
,
opt
);
if
(
sopt
->
DPs
>
MAX_DPs
||
sopt
->
DPs
==
0
||
sopt
->
def_DP
>=
sopt
->
DPs
)
return
-
EINVAL
;
if
(
tb2
[
TCA_GRED_DPS
-
1
]
==
0
)
sch_tree_lock
(
sch
);
return
-
EINVAL
;
table
->
DPs
=
sopt
->
DPs
;
table
->
def
=
sopt
->
def_DP
;
table
->
red_flags
=
sopt
->
flags
;
/*
* Every entry point to GRED is synchronized with the above code
* and the DP is checked against DPs, i.e. shadowed VQs can no
* longer be found so we can unlock right here.
*/
sch_tree_unlock
(
sch
);
if
(
sopt
->
grio
)
{
gred_enable_rio_mode
(
table
);
gred_disable_wred_mode
(
table
);
if
(
gred_wred_mode_check
(
sch
))
gred_enable_wred_mode
(
table
);
}
else
{
gred_disable_rio_mode
(
table
);
gred_disable_wred_mode
(
table
);
}
sopt
=
RTA_DATA
(
tb2
[
TCA_GRED_DPS
-
1
]);
for
(
i
=
table
->
DPs
;
i
<
MAX_DPs
;
i
++
)
{
table
->
DPs
=
sopt
->
DPs
;
if
(
table
->
tab
[
i
])
{
table
->
def
=
sopt
->
def_DP
;
printk
(
KERN_WARNING
"GRED: Warning: Destroying "
table
->
grio
=
sopt
->
grio
;
"shadowed VQ 0x%x
\n
"
,
i
);
table
->
initd
=
0
;
gred_destroy_vq
(
table
->
tab
[
i
])
;
/* probably need to clear all the table DP entries as well */
table
->
tab
[
i
]
=
NULL
;
return
0
;
}
}
}
return
0
;
}
if
(
!
table
->
DPs
||
tb
[
TCA_GRED_PARMS
-
1
]
==
0
||
tb
[
TCA_GRED_STAB
-
1
]
==
0
||
static
inline
int
gred_change_vq
(
struct
Qdisc
*
sch
,
int
dp
,
RTA_PAYLOAD
(
tb
[
TCA_GRED_PARMS
-
1
])
<
sizeof
(
*
ctl
)
||
struct
tc_gred_qopt
*
ctl
,
int
prio
,
u8
*
stab
)
RTA_PAYLOAD
(
tb
[
TCA_GRED_STAB
-
1
])
<
256
)
{
return
-
EINVAL
;
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
struct
gred_sched_data
*
q
;
ctl
=
RTA_DATA
(
tb
[
TCA_GRED_PARMS
-
1
]);
if
(
table
->
tab
[
dp
]
==
NULL
)
{
if
(
ctl
->
DP
>
MAX_DPs
-
1
)
{
table
->
tab
[
dp
]
=
kmalloc
(
sizeof
(
*
q
),
GFP_KERNEL
);
/* misbehaving is punished! Put in the default drop probability */
if
(
table
->
tab
[
dp
]
==
NULL
)
DPRINTK
(
"
\n
GRED: DP %u not in the proper range fixed. New DP "
"set to default at %d
\n
"
,
ctl
->
DP
,
table
->
def
);
ctl
->
DP
=
table
->
def
;
}
if
(
table
->
tab
[
ctl
->
DP
]
==
NULL
)
{
table
->
tab
[
ctl
->
DP
]
=
kmalloc
(
sizeof
(
struct
gred_sched_data
),
GFP_KERNEL
);
if
(
NULL
==
table
->
tab
[
ctl
->
DP
])
return
-
ENOMEM
;
return
-
ENOMEM
;
memset
(
table
->
tab
[
ctl
->
DP
],
0
,
(
sizeof
(
struct
gred_sched_data
)));
memset
(
table
->
tab
[
dp
],
0
,
sizeof
(
*
q
));
}
q
=
table
->
tab
[
ctl
->
DP
];
if
(
table
->
grio
)
{
if
(
ctl
->
prio
<=
0
)
{
if
(
table
->
def
&&
table
->
tab
[
table
->
def
])
{
DPRINTK
(
"
\n
GRED: DP %u does not have a prio"
"setting default to %d
\n
"
,
ctl
->
DP
,
table
->
tab
[
table
->
def
]
->
prio
);
q
->
prio
=
table
->
tab
[
table
->
def
]
->
prio
;
}
else
{
DPRINTK
(
"
\n
GRED: DP %u does not have a prio"
" setting default to 8
\n
"
,
ctl
->
DP
);
q
->
prio
=
8
;
}
}
else
{
q
->
prio
=
ctl
->
prio
;
}
}
else
{
q
->
prio
=
8
;
}
}
q
=
table
->
tab
[
dp
];
q
->
DP
=
ctl
->
DP
;
q
->
DP
=
dp
;
q
->
Wlog
=
ctl
->
Wlog
;
q
->
prio
=
prio
;
q
->
Plog
=
ctl
->
Plog
;
q
->
limit
=
ctl
->
limit
;
q
->
limit
=
ctl
->
limit
;
q
->
Scell_log
=
ctl
->
Scell_log
;
q
->
Rmask
=
ctl
->
Plog
<
32
?
((
1
<<
ctl
->
Plog
)
-
1
)
:
~
0UL
;
q
->
Scell_max
=
(
255
<<
q
->
Scell_log
);
q
->
qth_min
=
ctl
->
qth_min
<<
ctl
->
Wlog
;
q
->
qth_max
=
ctl
->
qth_max
<<
ctl
->
Wlog
;
q
->
qave
=
0
;
q
->
backlog
=
0
;
q
->
qcount
=
-
1
;
q
->
other
=
0
;
q
->
forced
=
0
;
q
->
pdrop
=
0
;
q
->
early
=
0
;
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
memcpy
(
q
->
Stab
,
RTA_DATA
(
tb
[
TCA_GRED_STAB
-
1
]),
256
);
if
(
table
->
initd
&&
table
->
grio
)
{
/* this looks ugly but it's not in the fast path */
for
(
i
=
0
;
i
<
table
->
DPs
;
i
++
)
{
if
((
!
table
->
tab
[
i
])
||
(
i
==
q
->
DP
)
)
continue
;
if
(
table
->
tab
[
i
]
->
prio
==
q
->
prio
){
/* WRED mode detected */
table
->
eqp
=
1
;
break
;
}
}
}
if
(
!
table
->
initd
)
{
if
(
q
->
backlog
==
0
)
table
->
initd
=
1
;
red_end_of_idle_period
(
&
q
->
parms
);
/*
the first entry also goes into the default until
over-written
*/
if
(
table
->
tab
[
table
->
def
]
==
NULL
)
{
table
->
tab
[
table
->
def
]
=
kmalloc
(
sizeof
(
struct
gred_sched_data
),
GFP_KERNEL
);
if
(
NULL
==
table
->
tab
[
table
->
def
])
return
-
ENOMEM
;
memset
(
table
->
tab
[
table
->
def
],
0
,
(
sizeof
(
struct
gred_sched_data
)));
}
q
=
table
->
tab
[
table
->
def
];
q
->
DP
=
table
->
def
;
q
->
Wlog
=
ctl
->
Wlog
;
q
->
Plog
=
ctl
->
Plog
;
q
->
limit
=
ctl
->
limit
;
q
->
Scell_log
=
ctl
->
Scell_log
;
q
->
Rmask
=
ctl
->
Plog
<
32
?
((
1
<<
ctl
->
Plog
)
-
1
)
:
~
0UL
;
q
->
Scell_max
=
(
255
<<
q
->
Scell_log
);
q
->
qth_min
=
ctl
->
qth_min
<<
ctl
->
Wlog
;
q
->
qth_max
=
ctl
->
qth_max
<<
ctl
->
Wlog
;
if
(
table
->
grio
)
q
->
prio
=
table
->
tab
[
ctl
->
DP
]
->
prio
;
else
q
->
prio
=
8
;
q
->
qcount
=
-
1
;
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
memcpy
(
q
->
Stab
,
RTA_DATA
(
tb
[
TCA_GRED_STAB
-
1
]),
256
);
}
return
0
;
red_set_parms
(
&
q
->
parms
,
ctl
->
qth_min
,
ctl
->
qth_max
,
ctl
->
Wlog
,
ctl
->
Plog
,
ctl
->
Scell_log
,
stab
);
return
0
;
}
}
static
int
gred_
init
(
struct
Qdisc
*
sch
,
struct
rtattr
*
opt
)
static
int
gred_
change
(
struct
Qdisc
*
sch
,
struct
rtattr
*
opt
)
{
{
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
struct
tc_gred_sopt
*
sopt
;
struct
tc_gred_qopt
*
ctl
;
struct
rtattr
*
tb
[
TCA_GRED_STAB
];
struct
rtattr
*
tb
[
TCA_GRED_MAX
];
struct
rtattr
*
tb2
[
TCA_GRED_DPS
];
int
err
=
-
EINVAL
,
prio
=
GRED_DEF_PRIO
;
u8
*
stab
;
if
(
opt
==
NULL
||
rtattr_parse_nested
(
tb
,
TCA_GRED_
STAB
,
opt
))
if
(
opt
==
NULL
||
rtattr_parse_nested
(
tb
,
TCA_GRED_
MAX
,
opt
))
return
-
EINVAL
;
return
-
EINVAL
;
if
(
tb
[
TCA_GRED_PARMS
-
1
]
==
0
&&
tb
[
TCA_GRED_STAB
-
1
]
==
0
)
{
if
(
tb
[
TCA_GRED_PARMS
-
1
]
==
NULL
&&
tb
[
TCA_GRED_STAB
-
1
]
==
NULL
)
rtattr_parse_nested
(
tb2
,
TCA_GRED_DPS
,
opt
);
return
gred_change_table_def
(
sch
,
opt
);
if
(
tb
[
TCA_GRED_PARMS
-
1
]
==
NULL
||
RTA_PAYLOAD
(
tb
[
TCA_GRED_PARMS
-
1
])
<
sizeof
(
*
ctl
)
||
tb
[
TCA_GRED_STAB
-
1
]
==
NULL
||
RTA_PAYLOAD
(
tb
[
TCA_GRED_STAB
-
1
])
<
256
)
return
-
EINVAL
;
ctl
=
RTA_DATA
(
tb
[
TCA_GRED_PARMS
-
1
]);
stab
=
RTA_DATA
(
tb
[
TCA_GRED_STAB
-
1
]);
if
(
ctl
->
DP
>=
table
->
DPs
)
goto
errout
;
if
(
tb2
[
TCA_GRED_DPS
-
1
]
==
0
)
if
(
gred_rio_mode
(
table
))
{
return
-
EINVAL
;
if
(
ctl
->
prio
==
0
)
{
int
def_prio
=
GRED_DEF_PRIO
;
sopt
=
RTA_DATA
(
tb2
[
TCA_GRED_DPS
-
1
]);
if
(
table
->
tab
[
table
->
def
])
table
->
DPs
=
sopt
->
DPs
;
def_prio
=
table
->
tab
[
table
->
def
]
->
prio
;
table
->
def
=
sopt
->
def_DP
;
table
->
grio
=
sopt
->
grio
;
printk
(
KERN_DEBUG
"GRED: DP %u does not have a prio "
table
->
initd
=
0
;
"setting default to %d
\n
"
,
ctl
->
DP
,
def_prio
);
return
0
;
prio
=
def_prio
;
}
else
prio
=
ctl
->
prio
;
}
sch_tree_lock
(
sch
);
err
=
gred_change_vq
(
sch
,
ctl
->
DP
,
ctl
,
prio
,
stab
);
if
(
err
<
0
)
goto
errout_locked
;
if
(
gred_rio_mode
(
table
))
{
gred_disable_wred_mode
(
table
);
if
(
gred_wred_mode_check
(
sch
))
gred_enable_wred_mode
(
table
);
}
}
DPRINTK
(
"
\n
GRED_INIT error!
\n
"
);
err
=
0
;
return
-
EINVAL
;
errout_locked:
sch_tree_unlock
(
sch
);
errout:
return
err
;
}
}
static
int
gred_
dump
(
struct
Qdisc
*
sch
,
struct
sk_buff
*
skb
)
static
int
gred_
init
(
struct
Qdisc
*
sch
,
struct
rtattr
*
opt
)
{
{
unsigned
long
qave
;
struct
rtattr
*
tb
[
TCA_GRED_MAX
];
struct
rtattr
*
rta
;
struct
tc_gred_qopt
*
opt
=
NULL
;
struct
tc_gred_qopt
*
dst
;
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
struct
gred_sched_data
*
q
;
int
i
;
unsigned
char
*
b
=
skb
->
tail
;
rta
=
(
struct
rtattr
*
)
b
;
if
(
opt
==
NULL
||
rtattr_parse_nested
(
tb
,
TCA_GRED_MAX
,
opt
))
RTA_PUT
(
skb
,
TCA_OPTIONS
,
0
,
NULL
)
;
return
-
EINVAL
;
opt
=
kmalloc
(
sizeof
(
struct
tc_gred_qopt
)
*
MAX_DPs
,
GFP_KERNEL
);
if
(
tb
[
TCA_GRED_PARMS
-
1
]
||
tb
[
TCA_GRED_STAB
-
1
])
return
-
EINVAL
;
if
(
opt
==
NULL
)
{
return
gred_change_table_def
(
sch
,
tb
[
TCA_GRED_DPS
-
1
]);
DPRINTK
(
"gred_dump:failed to malloc for %Zd
\n
"
,
}
sizeof
(
struct
tc_gred_qopt
)
*
MAX_DPs
);
goto
rtattr_failure
;
}
memset
(
opt
,
0
,
(
sizeof
(
struct
tc_gred_qopt
))
*
table
->
DPs
);
static
int
gred_dump
(
struct
Qdisc
*
sch
,
struct
sk_buff
*
skb
)
{
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
struct
rtattr
*
parms
,
*
opts
=
NULL
;
int
i
;
struct
tc_gred_sopt
sopt
=
{
.
DPs
=
table
->
DPs
,
.
def_DP
=
table
->
def
,
.
grio
=
gred_rio_mode
(
table
),
.
flags
=
table
->
red_flags
,
};
if
(
!
table
->
initd
)
{
opts
=
RTA_NEST
(
skb
,
TCA_OPTIONS
);
DPRINTK
(
"NO GRED Queues setup!
\n
"
);
RTA_PUT
(
skb
,
TCA_GRED_DPS
,
sizeof
(
sopt
),
&
sopt
);
}
parms
=
RTA_NEST
(
skb
,
TCA_GRED_PARMS
);
for
(
i
=
0
;
i
<
MAX_DPs
;
i
++
)
{
struct
gred_sched_data
*
q
=
table
->
tab
[
i
];
struct
tc_gred_qopt
opt
;
for
(
i
=
0
;
i
<
MAX_DPs
;
i
++
)
{
memset
(
&
opt
,
0
,
sizeof
(
opt
));
dst
=
&
opt
[
i
];
q
=
table
->
tab
[
i
];
if
(
!
q
)
{
if
(
!
q
)
{
/* hack -- fix at some point with proper message
/* hack -- fix at some point with proper message
This is how we indicate to tc that there is no VQ
This is how we indicate to tc that there is no VQ
at this DP */
at this DP */
dst
->
DP
=
MAX_DPs
+
i
;
opt
.
DP
=
MAX_DPs
+
i
;
continue
;
goto
append_opt
;
}
}
dst
->
limit
=
q
->
limit
;
opt
.
limit
=
q
->
limit
;
dst
->
qth_min
=
q
->
qth_min
>>
q
->
Wlog
;
opt
.
DP
=
q
->
DP
;
dst
->
qth_max
=
q
->
qth_max
>>
q
->
Wlog
;
opt
.
backlog
=
q
->
backlog
;
dst
->
DP
=
q
->
DP
;
opt
.
prio
=
q
->
prio
;
dst
->
backlog
=
q
->
backlog
;
opt
.
qth_min
=
q
->
parms
.
qth_min
>>
q
->
parms
.
Wlog
;
if
(
q
->
qave
)
{
opt
.
qth_max
=
q
->
parms
.
qth_max
>>
q
->
parms
.
Wlog
;
if
(
table
->
eqp
&&
table
->
grio
)
{
opt
.
Wlog
=
q
->
parms
.
Wlog
;
q
->
qidlestart
=
table
->
tab
[
table
->
def
]
->
qidlestart
;
opt
.
Plog
=
q
->
parms
.
Plog
;
q
->
qave
=
table
->
tab
[
table
->
def
]
->
qave
;
opt
.
Scell_log
=
q
->
parms
.
Scell_log
;
}
opt
.
other
=
q
->
stats
.
other
;
if
(
!
PSCHED_IS_PASTPERFECT
(
q
->
qidlestart
))
{
opt
.
early
=
q
->
stats
.
prob_drop
;
long
idle
;
opt
.
forced
=
q
->
stats
.
forced_drop
;
psched_time_t
now
;
opt
.
pdrop
=
q
->
stats
.
pdrop
;
PSCHED_GET_TIME
(
now
);
opt
.
packets
=
q
->
packetsin
;
idle
=
PSCHED_TDIFF_SAFE
(
now
,
q
->
qidlestart
,
q
->
Scell_max
);
opt
.
bytesin
=
q
->
bytesin
;
qave
=
q
->
qave
>>
q
->
Stab
[(
idle
>>
q
->
Scell_log
)
&
0xFF
];
dst
->
qave
=
qave
>>
q
->
Wlog
;
if
(
gred_wred_mode
(
table
))
{
q
->
parms
.
qidlestart
=
}
else
{
table
->
tab
[
table
->
def
]
->
parms
.
qidlestart
;
dst
->
qave
=
q
->
qave
>>
q
->
Wlog
;
q
->
parms
.
qavg
=
table
->
tab
[
table
->
def
]
->
parms
.
qavg
;
}
}
else
{
dst
->
qave
=
0
;
}
}
opt
.
qave
=
red_calc_qavg
(
&
q
->
parms
,
q
->
parms
.
qavg
);
dst
->
Wlog
=
q
->
Wlog
;
dst
->
Plog
=
q
->
Plog
;
append_opt:
dst
->
Scell_log
=
q
->
Scell_log
;
RTA_APPEND
(
skb
,
sizeof
(
opt
),
&
opt
);
dst
->
other
=
q
->
other
;
dst
->
forced
=
q
->
forced
;
dst
->
early
=
q
->
early
;
dst
->
pdrop
=
q
->
pdrop
;
dst
->
prio
=
q
->
prio
;
dst
->
packets
=
q
->
packetsin
;
dst
->
bytesin
=
q
->
bytesin
;
}
}
RTA_PUT
(
skb
,
TCA_GRED_PARMS
,
sizeof
(
struct
tc_gred_qopt
)
*
MAX_DPs
,
opt
);
RTA_NEST_END
(
skb
,
parms
);
rta
->
rta_len
=
skb
->
tail
-
b
;
kfree
(
opt
);
return
RTA_NEST_END
(
skb
,
opts
);
return
skb
->
len
;
rtattr_failure:
rtattr_failure:
if
(
opt
)
return
RTA_NEST_CANCEL
(
skb
,
opts
);
kfree
(
opt
);
DPRINTK
(
"gred_dump: FAILURE!!!!
\n
"
);
/* also free the opt struct here */
skb_trim
(
skb
,
b
-
skb
->
data
);
return
-
1
;
}
}
static
void
gred_destroy
(
struct
Qdisc
*
sch
)
static
void
gred_destroy
(
struct
Qdisc
*
sch
)
...
@@ -594,15 +574,13 @@ static void gred_destroy(struct Qdisc *sch)
...
@@ -594,15 +574,13 @@ static void gred_destroy(struct Qdisc *sch)
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
struct
gred_sched
*
table
=
qdisc_priv
(
sch
);
int
i
;
int
i
;
for
(
i
=
0
;
i
<
table
->
DPs
;
i
++
)
{
for
(
i
=
0
;
i
<
table
->
DPs
;
i
++
)
{
if
(
table
->
tab
[
i
])
if
(
table
->
tab
[
i
])
kfree
(
table
->
tab
[
i
]);
gred_destroy_vq
(
table
->
tab
[
i
]);
}
}
}
}
static
struct
Qdisc_ops
gred_qdisc_ops
=
{
static
struct
Qdisc_ops
gred_qdisc_ops
=
{
.
next
=
NULL
,
.
cl_ops
=
NULL
,
.
id
=
"gred"
,
.
id
=
"gred"
,
.
priv_size
=
sizeof
(
struct
gred_sched
),
.
priv_size
=
sizeof
(
struct
gred_sched
),
.
enqueue
=
gred_enqueue
,
.
enqueue
=
gred_enqueue
,
...
@@ -621,10 +599,13 @@ static int __init gred_module_init(void)
...
@@ -621,10 +599,13 @@ static int __init gred_module_init(void)
{
{
return
register_qdisc
(
&
gred_qdisc_ops
);
return
register_qdisc
(
&
gred_qdisc_ops
);
}
}
static
void
__exit
gred_module_exit
(
void
)
static
void
__exit
gred_module_exit
(
void
)
{
{
unregister_qdisc
(
&
gred_qdisc_ops
);
unregister_qdisc
(
&
gred_qdisc_ops
);
}
}
module_init
(
gred_module_init
)
module_init
(
gred_module_init
)
module_exit
(
gred_module_exit
)
module_exit
(
gred_module_exit
)
MODULE_LICENSE
(
"GPL"
);
MODULE_LICENSE
(
"GPL"
);
net/sched/sch_red.c
View file @
2d43f112
...
@@ -9,76 +9,23 @@
...
@@ -9,76 +9,23 @@
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*
* Changes:
* Changes:
* J Hadi Salim
<hadi@nortel.com>
980914: computation fixes
* J Hadi Salim 980914: computation fixes
* Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
* Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
* J Hadi Salim
<hadi@nortelnetworks.com> 980816: ECN support
* J Hadi Salim
980816: ECN support
*/
*/
#include <linux/config.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/inet_ecn.h>
#include <net/
dsfiel
d.h>
#include <net/
re
d.h>
/* Random Early Detection (RED) algorithm.
/* Parameters, settable by user:
=======================================
Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
This file codes a "divisionless" version of RED algorithm
as written down in Fig.17 of the paper.
Short description.
------------------
When a new packet arrives we calculate the average queue length:
avg = (1-W)*avg + W*current_queue_len,
W is the filter time constant (chosen as 2^(-Wlog)), it controls
the inertia of the algorithm. To allow larger bursts, W should be
decreased.
if (avg > th_max) -> packet marked (dropped).
if (avg < th_min) -> packet passes.
if (th_min < avg < th_max) we calculate probability:
Pb = max_P * (avg - th_min)/(th_max-th_min)
and mark (drop) packet with this probability.
Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
max_P should be small (not 1), usually 0.01..0.02 is good value.
max_P is chosen as a number, so that max_P/(th_max-th_min)
is a negative power of two in order arithmetics to contain
only shifts.
Parameters, settable by user:
-----------------------------
-----------------------------
limit - bytes (must be > qth_max + burst)
limit - bytes (must be > qth_max + burst)
...
@@ -89,243 +36,93 @@ Short description.
...
@@ -89,243 +36,93 @@ Short description.
arbitrarily high (well, less than ram size)
arbitrarily high (well, less than ram size)
Really, this limit will never be reached
Really, this limit will never be reached
if RED works correctly.
if RED works correctly.
qth_min - bytes (should be < qth_max/2)
qth_max - bytes (should be at least 2*qth_min and less limit)
Wlog - bits (<32) log(1/W).
Plog - bits (<32)
Plog is related to max_P by formula:
max_P = (qth_max-qth_min)/2^Plog;
F.e. if qth_max=128K and qth_min=32K, then Plog=22
corresponds to max_P=0.02
Scell_log
Stab
Lookup table for log((1-W)^(t/t_ave).
NOTES:
Upper bound on W.
-----------------
If you want to allow bursts of L packets of size S,
you should choose W:
L + 1 - th_min/S < (1-(1-W)^L)/W
th_min/S = 32 th_min/S = 4
log(W) L
-1 33
-2 35
-3 39
-4 46
-5 57
-6 75
-7 101
-8 135
-9 190
etc.
*/
*/
struct
red_sched_data
struct
red_sched_data
{
{
/* Parameters */
u32
limit
;
/* HARD maximal queue length */
u32
limit
;
/* HARD maximal queue length */
unsigned
char
flags
;
u32
qth_min
;
/* Min average length threshold: A scaled */
struct
red_parms
parms
;
u32
qth_max
;
/* Max average length threshold: A scaled */
struct
red_stats
stats
;
u32
Rmask
;
u32
Scell_max
;
unsigned
char
flags
;
char
Wlog
;
/* log(W) */
char
Plog
;
/* random number bits */
char
Scell_log
;
u8
Stab
[
256
];
/* Variables */
unsigned
long
qave
;
/* Average queue length: A scaled */
int
qcount
;
/* Packets since last random number generation */
u32
qR
;
/* Cached random number */
psched_time_t
qidlestart
;
/* Start of idle period */
struct
tc_red_xstats
st
;
};
};
static
in
t
red_ecn_mark
(
struct
sk_buff
*
skb
)
static
in
line
int
red_use_ecn
(
struct
red_sched_data
*
q
)
{
{
if
(
skb
->
nh
.
raw
+
20
>
skb
->
tail
)
return
q
->
flags
&
TC_RED_ECN
;
return
0
;
switch
(
skb
->
protocol
)
{
case
__constant_htons
(
ETH_P_IP
):
if
(
INET_ECN_is_not_ect
(
skb
->
nh
.
iph
->
tos
))
return
0
;
IP_ECN_set_ce
(
skb
->
nh
.
iph
);
return
1
;
case
__constant_htons
(
ETH_P_IPV6
):
if
(
INET_ECN_is_not_ect
(
ipv6_get_dsfield
(
skb
->
nh
.
ipv6h
)))
return
0
;
IP6_ECN_set_ce
(
skb
->
nh
.
ipv6h
);
return
1
;
default:
return
0
;
}
}
}
static
int
static
inline
int
red_use_harddrop
(
struct
red_sched_data
*
q
)
red_enqueue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
{
return
q
->
flags
&
TC_RED_HARDDROP
;
}
static
int
red_enqueue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
{
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
psched_time_t
now
;
q
->
parms
.
qavg
=
red_calc_qavg
(
&
q
->
parms
,
sch
->
qstats
.
backlog
)
;
if
(
!
PSCHED_IS_PASTPERFECT
(
q
->
qidlestart
))
{
if
(
red_is_idling
(
&
q
->
parms
))
long
us_idle
;
red_end_of_idle_period
(
&
q
->
parms
);
int
shift
;
PSCHED_GET_TIME
(
now
);
switch
(
red_action
(
&
q
->
parms
,
q
->
parms
.
qavg
))
{
us_idle
=
PSCHED_TDIFF_SAFE
(
now
,
q
->
qidlestart
,
q
->
Scell_max
);
case
RED_DONT_MARK
:
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
)
;
break
;
/*
case
RED_PROB_MARK
:
The problem: ideally, average length queue recalcultion should
sch
->
qstats
.
overlimits
++
;
be done over constant clock intervals. This is too expensive, so that
if
(
!
red_use_ecn
(
q
)
||
!
INET_ECN_set_ce
(
skb
))
{
the calculation is driven by outgoing packets.
q
->
stats
.
prob_drop
++
;
When the queue is idle we have to model this clock by hand.
goto
congestion_drop
;
}
SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)
dummy packets as a burst after idle time, i.e.
q->qave *= (1-W)^m
This is an apparently overcomplicated solution (f.e. we have to precompute
a table to make this calculation in reasonable time)
I believe that a simpler model may be used here,
but it is field for experiments.
*/
shift
=
q
->
Stab
[
us_idle
>>
q
->
Scell_log
];
if
(
shift
)
{
q
->
qave
>>=
shift
;
}
else
{
/* Approximate initial part of exponent
with linear function:
(1-W)^m ~= 1-mW + ...
Seems, it is the best solution to
problem of too coarce exponent tabulation.
*/
us_idle
=
(
q
->
qave
*
us_idle
)
>>
q
->
Scell_log
;
if
(
us_idle
<
q
->
qave
/
2
)
q
->
qave
-=
us_idle
;
else
q
->
qave
>>=
1
;
}
}
else
{
q
->
qave
+=
sch
->
qstats
.
backlog
-
(
q
->
qave
>>
q
->
Wlog
);
/* NOTE:
q->qave is fixed point number with point at Wlog.
The formulae above is equvalent to floating point
version:
qave = qave*(1-W) + sch->qstats.backlog*W;
--ANK (980924)
*/
}
if
(
q
->
qave
<
q
->
qth_min
)
{
q
->
stats
.
prob_mark
++
;
q
->
qcount
=
-
1
;
break
;
enqueue:
if
(
sch
->
qstats
.
backlog
+
skb
->
len
<=
q
->
limit
)
{
case
RED_HARD_MARK
:
__skb_queue_tail
(
&
sch
->
q
,
skb
);
sch
->
qstats
.
overlimits
++
;
sch
->
qstats
.
backlog
+=
skb
->
len
;
if
(
red_use_harddrop
(
q
)
||
!
red_use_ecn
(
q
)
||
sch
->
bstats
.
bytes
+=
skb
->
len
;
!
INET_ECN_set_ce
(
skb
))
{
sch
->
bstats
.
packets
++
;
q
->
stats
.
forced_drop
++
;
return
NET_XMIT_SUCCESS
;
goto
congestion_drop
;
}
else
{
}
q
->
st
.
pdrop
++
;
}
kfree_skb
(
skb
);
sch
->
qstats
.
drops
++
;
return
NET_XMIT_DROP
;
}
if
(
q
->
qave
>=
q
->
qth_max
)
{
q
->
qcount
=
-
1
;
sch
->
qstats
.
overlimits
++
;
mark:
if
(
!
(
q
->
flags
&
TC_RED_ECN
)
||
!
red_ecn_mark
(
skb
))
{
q
->
st
.
early
++
;
goto
drop
;
}
q
->
st
.
marked
++
;
goto
enqueue
;
}
if
(
++
q
->
qcount
)
{
q
->
stats
.
forced_mark
++
;
/* The formula used below causes questions.
break
;
OK. qR is random number in the interval 0..Rmask
i.e. 0..(2^Plog). If we used floating point
arithmetics, it would be: (2^Plog)*rnd_num,
where rnd_num is less 1.
Taking into account, that qave have fixed
point at Wlog, and Plog is related to max_P by
max_P = (qth_max-qth_min)/2^Plog; two lines
below have the following floating point equivalent:
max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount
Any questions? --ANK (980924)
*/
if
(((
q
->
qave
-
q
->
qth_min
)
>>
q
->
Wlog
)
*
q
->
qcount
<
q
->
qR
)
goto
enqueue
;
q
->
qcount
=
0
;
q
->
qR
=
net_random
()
&
q
->
Rmask
;
sch
->
qstats
.
overlimits
++
;
goto
mark
;
}
}
q
->
qR
=
net_random
()
&
q
->
Rmask
;
goto
enqueue
;
drop:
if
(
sch
->
qstats
.
backlog
+
skb
->
len
<=
q
->
limit
)
kfree_skb
(
skb
);
return
qdisc_enqueue_tail
(
skb
,
sch
);
sch
->
qstats
.
drops
++
;
q
->
stats
.
pdrop
++
;
return
qdisc_drop
(
skb
,
sch
);
congestion_drop:
qdisc_drop
(
skb
,
sch
);
return
NET_XMIT_CN
;
return
NET_XMIT_CN
;
}
}
static
int
static
int
red_requeue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
red_requeue
(
struct
sk_buff
*
skb
,
struct
Qdisc
*
sch
)
{
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
if
(
red_is_idling
(
&
q
->
parms
))
red_end_of_idle_period
(
&
q
->
parms
);
__skb_queue_head
(
&
sch
->
q
,
skb
);
return
qdisc_requeue
(
skb
,
sch
);
sch
->
qstats
.
backlog
+=
skb
->
len
;
sch
->
qstats
.
requeues
++
;
return
0
;
}
}
static
struct
sk_buff
*
static
struct
sk_buff
*
red_dequeue
(
struct
Qdisc
*
sch
)
red_dequeue
(
struct
Qdisc
*
sch
)
{
{
struct
sk_buff
*
skb
;
struct
sk_buff
*
skb
;
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
skb
=
__skb_dequeue
(
&
sch
->
q
);
skb
=
qdisc_dequeue_head
(
sch
);
if
(
skb
)
{
sch
->
qstats
.
backlog
-=
skb
->
len
;
if
(
skb
==
NULL
&&
!
red_is_idling
(
&
q
->
parms
))
return
skb
;
red_start_of_idle_period
(
&
q
->
parms
);
}
PSCHED_GET_TIME
(
q
->
qidlestart
);
return
skb
;
return
NULL
;
}
}
static
unsigned
int
red_drop
(
struct
Qdisc
*
sch
)
static
unsigned
int
red_drop
(
struct
Qdisc
*
sch
)
...
@@ -333,16 +130,17 @@ static unsigned int red_drop(struct Qdisc* sch)
...
@@ -333,16 +130,17 @@ static unsigned int red_drop(struct Qdisc* sch)
struct
sk_buff
*
skb
;
struct
sk_buff
*
skb
;
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
skb
=
__skb_dequeue_tail
(
&
sch
->
q
);
skb
=
qdisc_dequeue_tail
(
sch
);
if
(
skb
)
{
if
(
skb
)
{
unsigned
int
len
=
skb
->
len
;
unsigned
int
len
=
skb
->
len
;
sch
->
qstats
.
backlog
-=
len
;
q
->
stats
.
other
++
;
sch
->
qstats
.
drops
++
;
qdisc_drop
(
skb
,
sch
);
q
->
st
.
other
++
;
kfree_skb
(
skb
);
return
len
;
return
len
;
}
}
PSCHED_GET_TIME
(
q
->
qidlestart
);
if
(
!
red_is_idling
(
&
q
->
parms
))
red_start_of_idle_period
(
&
q
->
parms
);
return
0
;
return
0
;
}
}
...
@@ -350,43 +148,38 @@ static void red_reset(struct Qdisc* sch)
...
@@ -350,43 +148,38 @@ static void red_reset(struct Qdisc* sch)
{
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
__skb_queue_purge
(
&
sch
->
q
);
qdisc_reset_queue
(
sch
);
sch
->
qstats
.
backlog
=
0
;
red_restart
(
&
q
->
parms
);
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
q
->
qave
=
0
;
q
->
qcount
=
-
1
;
}
}
static
int
red_change
(
struct
Qdisc
*
sch
,
struct
rtattr
*
opt
)
static
int
red_change
(
struct
Qdisc
*
sch
,
struct
rtattr
*
opt
)
{
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
rtattr
*
tb
[
TCA_RED_
STAB
];
struct
rtattr
*
tb
[
TCA_RED_
MAX
];
struct
tc_red_qopt
*
ctl
;
struct
tc_red_qopt
*
ctl
;
if
(
opt
==
NULL
||
if
(
opt
==
NULL
||
rtattr_parse_nested
(
tb
,
TCA_RED_MAX
,
opt
))
rtattr_parse_nested
(
tb
,
TCA_RED_STAB
,
opt
)
||
return
-
EINVAL
;
tb
[
TCA_RED_PARMS
-
1
]
==
0
||
tb
[
TCA_RED_STAB
-
1
]
==
0
||
if
(
tb
[
TCA_RED_PARMS
-
1
]
==
NULL
||
RTA_PAYLOAD
(
tb
[
TCA_RED_PARMS
-
1
])
<
sizeof
(
*
ctl
)
||
RTA_PAYLOAD
(
tb
[
TCA_RED_PARMS
-
1
])
<
sizeof
(
*
ctl
)
||
RTA_PAYLOAD
(
tb
[
TCA_RED_STAB
-
1
])
<
256
)
tb
[
TCA_RED_STAB
-
1
]
==
NULL
||
RTA_PAYLOAD
(
tb
[
TCA_RED_STAB
-
1
])
<
RED_STAB_SIZE
)
return
-
EINVAL
;
return
-
EINVAL
;
ctl
=
RTA_DATA
(
tb
[
TCA_RED_PARMS
-
1
]);
ctl
=
RTA_DATA
(
tb
[
TCA_RED_PARMS
-
1
]);
sch_tree_lock
(
sch
);
sch_tree_lock
(
sch
);
q
->
flags
=
ctl
->
flags
;
q
->
flags
=
ctl
->
flags
;
q
->
Wlog
=
ctl
->
Wlog
;
q
->
Plog
=
ctl
->
Plog
;
q
->
Rmask
=
ctl
->
Plog
<
32
?
((
1
<<
ctl
->
Plog
)
-
1
)
:
~
0UL
;
q
->
Scell_log
=
ctl
->
Scell_log
;
q
->
Scell_max
=
(
255
<<
q
->
Scell_log
);
q
->
qth_min
=
ctl
->
qth_min
<<
ctl
->
Wlog
;
q
->
qth_max
=
ctl
->
qth_max
<<
ctl
->
Wlog
;
q
->
limit
=
ctl
->
limit
;
q
->
limit
=
ctl
->
limit
;
memcpy
(
q
->
Stab
,
RTA_DATA
(
tb
[
TCA_RED_STAB
-
1
]),
256
);
q
->
qcount
=
-
1
;
red_set_parms
(
&
q
->
parms
,
ctl
->
qth_min
,
ctl
->
qth_max
,
ctl
->
Wlog
,
ctl
->
Plog
,
ctl
->
Scell_log
,
RTA_DATA
(
tb
[
TCA_RED_STAB
-
1
]));
if
(
skb_queue_empty
(
&
sch
->
q
))
if
(
skb_queue_empty
(
&
sch
->
q
))
PSCHED_SET_PASTPERFECT
(
q
->
qidlestart
);
red_end_of_idle_period
(
&
q
->
parms
);
sch_tree_unlock
(
sch
);
sch_tree_unlock
(
sch
);
return
0
;
return
0
;
}
}
...
@@ -399,39 +192,39 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt)
...
@@ -399,39 +192,39 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt)
static
int
red_dump
(
struct
Qdisc
*
sch
,
struct
sk_buff
*
skb
)
static
int
red_dump
(
struct
Qdisc
*
sch
,
struct
sk_buff
*
skb
)
{
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
unsigned
char
*
b
=
skb
->
tail
;
struct
rtattr
*
opts
=
NULL
;
struct
rtattr
*
rta
;
struct
tc_red_qopt
opt
=
{
struct
tc_red_qopt
opt
;
.
limit
=
q
->
limit
,
.
flags
=
q
->
flags
,
rta
=
(
struct
rtattr
*
)
b
;
.
qth_min
=
q
->
parms
.
qth_min
>>
q
->
parms
.
Wlog
,
RTA_PUT
(
skb
,
TCA_OPTIONS
,
0
,
NULL
);
.
qth_max
=
q
->
parms
.
qth_max
>>
q
->
parms
.
Wlog
,
opt
.
limit
=
q
->
limit
;
.
Wlog
=
q
->
parms
.
Wlog
,
opt
.
qth_min
=
q
->
qth_min
>>
q
->
Wlog
;
.
Plog
=
q
->
parms
.
Plog
,
opt
.
qth_max
=
q
->
qth_max
>>
q
->
Wlog
;
.
Scell_log
=
q
->
parms
.
Scell_log
,
opt
.
Wlog
=
q
->
Wlog
;
};
opt
.
Plog
=
q
->
Plog
;
opt
.
Scell_log
=
q
->
Scell_log
;
opts
=
RTA_NEST
(
skb
,
TCA_OPTIONS
);
opt
.
flags
=
q
->
flags
;
RTA_PUT
(
skb
,
TCA_RED_PARMS
,
sizeof
(
opt
),
&
opt
);
RTA_PUT
(
skb
,
TCA_RED_PARMS
,
sizeof
(
opt
),
&
opt
);
rta
->
rta_len
=
skb
->
tail
-
b
;
return
RTA_NEST_END
(
skb
,
opts
);
return
skb
->
len
;
rtattr_failure:
rtattr_failure:
skb_trim
(
skb
,
b
-
skb
->
data
);
return
RTA_NEST_CANCEL
(
skb
,
opts
);
return
-
1
;
}
}
static
int
red_dump_stats
(
struct
Qdisc
*
sch
,
struct
gnet_dump
*
d
)
static
int
red_dump_stats
(
struct
Qdisc
*
sch
,
struct
gnet_dump
*
d
)
{
{
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
red_sched_data
*
q
=
qdisc_priv
(
sch
);
struct
tc_red_xstats
st
=
{
return
gnet_stats_copy_app
(
d
,
&
q
->
st
,
sizeof
(
q
->
st
));
.
early
=
q
->
stats
.
prob_drop
+
q
->
stats
.
forced_drop
,
.
pdrop
=
q
->
stats
.
pdrop
,
.
other
=
q
->
stats
.
other
,
.
marked
=
q
->
stats
.
prob_mark
+
q
->
stats
.
forced_mark
,
};
return
gnet_stats_copy_app
(
d
,
&
st
,
sizeof
(
st
));
}
}
static
struct
Qdisc_ops
red_qdisc_ops
=
{
static
struct
Qdisc_ops
red_qdisc_ops
=
{
.
next
=
NULL
,
.
cl_ops
=
NULL
,
.
id
=
"red"
,
.
id
=
"red"
,
.
priv_size
=
sizeof
(
struct
red_sched_data
),
.
priv_size
=
sizeof
(
struct
red_sched_data
),
.
enqueue
=
red_enqueue
,
.
enqueue
=
red_enqueue
,
...
@@ -450,10 +243,13 @@ static int __init red_module_init(void)
...
@@ -450,10 +243,13 @@ static int __init red_module_init(void)
{
{
return
register_qdisc
(
&
red_qdisc_ops
);
return
register_qdisc
(
&
red_qdisc_ops
);
}
}
static
void
__exit
red_module_exit
(
void
)
static
void
__exit
red_module_exit
(
void
)
{
{
unregister_qdisc
(
&
red_qdisc_ops
);
unregister_qdisc
(
&
red_qdisc_ops
);
}
}
module_init
(
red_module_init
)
module_init
(
red_module_init
)
module_exit
(
red_module_exit
)
module_exit
(
red_module_exit
)
MODULE_LICENSE
(
"GPL"
);
MODULE_LICENSE
(
"GPL"
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment