Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
linux
Commits
2d43f112
Commit
2d43f112
authored
Nov 05, 2005
by
Arnaldo Carvalho de Melo
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'red' of 84.73.165.173:/home/tgr/repos/net-2.6
parents
6df71634
bdc450a0
Changes
5
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
891 additions
and
771 deletions
+891
-771
include/linux/pkt_sched.h
include/linux/pkt_sched.h
+24
-26
include/net/inet_ecn.h
include/net/inet_ecn.h
+24
-4
include/net/red.h
include/net/red.h
+325
-0
net/sched/sch_gred.c
net/sched/sch_gred.c
+411
-430
net/sched/sch_red.c
net/sched/sch_red.c
+107
-311
No files found.
include/linux/pkt_sched.h
View file @
2d43f112
...
@@ -93,6 +93,7 @@ struct tc_fifo_qopt
...
@@ -93,6 +93,7 @@ struct tc_fifo_qopt
/* PRIO section */
/* PRIO section */
#define TCQ_PRIO_BANDS 16
#define TCQ_PRIO_BANDS 16
#define TCQ_MIN_PRIO_BANDS 2
struct
tc_prio_qopt
struct
tc_prio_qopt
{
{
...
@@ -169,6 +170,7 @@ struct tc_red_qopt
...
@@ -169,6 +170,7 @@ struct tc_red_qopt
unsigned
char
Scell_log
;
/* cell size for idle damping */
unsigned
char
Scell_log
;
/* cell size for idle damping */
unsigned
char
flags
;
unsigned
char
flags
;
#define TC_RED_ECN 1
#define TC_RED_ECN 1
#define TC_RED_HARDDROP 2
};
};
struct
tc_red_xstats
struct
tc_red_xstats
...
@@ -194,15 +196,11 @@ enum
...
@@ -194,15 +196,11 @@ enum
#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
#define TCA_SET_OFF TCA_GRED_PARMS
struct
tc_gred_qopt
struct
tc_gred_qopt
{
{
__u32
limit
;
/* HARD maximal queue length (bytes)
__u32
limit
;
/* HARD maximal queue length (bytes) */
*/
__u32
qth_min
;
/* Min average length threshold (bytes) */
__u32
qth_min
;
/* Min average length threshold (bytes)
__u32
qth_max
;
/* Max average length threshold (bytes) */
*/
__u32
qth_max
;
/* Max average length threshold (bytes)
*/
__u32
DP
;
/* upto 2^32 DPs */
__u32
DP
;
/* upto 2^32 DPs */
__u32
backlog
;
__u32
backlog
;
__u32
qave
;
__u32
qave
;
...
@@ -210,22 +208,22 @@ struct tc_gred_qopt
...
@@ -210,22 +208,22 @@ struct tc_gred_qopt
__u32
early
;
__u32
early
;
__u32
other
;
__u32
other
;
__u32
pdrop
;
__u32
pdrop
;
__u8
Wlog
;
/* log(W) */
unsigned
char
Wlog
;
/* log(W) */
__u8
Plog
;
/* log(P_max/(qth_max-qth_min)) */
unsigned
char
Plog
;
/* log(P_max/(qth_max-qth_min)) */
__u8
Scell_log
;
/* cell size for idle damping */
unsigned
char
Scell_log
;
/* cell size for idle damping */
__u8
prio
;
/* prio of this VQ */
__u8
prio
;
/* prio of this VQ */
__u32
packets
;
__u32
packets
;
__u32
bytesin
;
__u32
bytesin
;
};
};
/* gred setup */
/* gred setup */
struct
tc_gred_sopt
struct
tc_gred_sopt
{
{
__u32
DPs
;
__u32
DPs
;
__u32
def_DP
;
__u32
def_DP
;
__u8
grio
;
__u8
grio
;
__u8
pad1
;
__u8
flags
;
__u16
pad2
;
__u16
pad1
;
};
};
/* HTB section */
/* HTB section */
...
...
include/net/inet_ecn.h
View file @
2d43f112
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
#define _INET_ECN_H_
#define _INET_ECN_H_
#include <linux/ip.h>
#include <linux/ip.h>
#include <linux/skbuff.h>
#include <net/dsfield.h>
#include <net/dsfield.h>
enum
{
enum
{
...
@@ -48,7 +49,7 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
...
@@ -48,7 +49,7 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
(label) |= __constant_htons(INET_ECN_ECT_0 << 4); \
(label) |= __constant_htons(INET_ECN_ECT_0 << 4); \
} while (0)
} while (0)
static
inline
void
IP_ECN_set_ce
(
struct
iphdr
*
iph
)
static
inline
int
IP_ECN_set_ce
(
struct
iphdr
*
iph
)
{
{
u32
check
=
iph
->
check
;
u32
check
=
iph
->
check
;
u32
ecn
=
(
iph
->
tos
+
1
)
&
INET_ECN_MASK
;
u32
ecn
=
(
iph
->
tos
+
1
)
&
INET_ECN_MASK
;
...
@@ -61,7 +62,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
...
@@ -61,7 +62,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
* INET_ECN_CE => 00
* INET_ECN_CE => 00
*/
*/
if
(
!
(
ecn
&
2
))
if
(
!
(
ecn
&
2
))
return
;
return
!
ecn
;
/*
/*
* The following gives us:
* The following gives us:
...
@@ -72,6 +73,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
...
@@ -72,6 +73,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph)
iph
->
check
=
check
+
(
check
>=
0xFFFF
);
iph
->
check
=
check
+
(
check
>=
0xFFFF
);
iph
->
tos
|=
INET_ECN_CE
;
iph
->
tos
|=
INET_ECN_CE
;
return
1
;
}
}
static
inline
void
IP_ECN_clear
(
struct
iphdr
*
iph
)
static
inline
void
IP_ECN_clear
(
struct
iphdr
*
iph
)
...
@@ -87,11 +89,12 @@ static inline void ipv4_copy_dscp(struct iphdr *outer, struct iphdr *inner)
...
@@ -87,11 +89,12 @@ static inline void ipv4_copy_dscp(struct iphdr *outer, struct iphdr *inner)
struct
ipv6hdr
;
struct
ipv6hdr
;
static
inline
void
IP6_ECN_set_ce
(
struct
ipv6hdr
*
iph
)
static
inline
int
IP6_ECN_set_ce
(
struct
ipv6hdr
*
iph
)
{
{
if
(
INET_ECN_is_not_ect
(
ipv6_get_dsfield
(
iph
)))
if
(
INET_ECN_is_not_ect
(
ipv6_get_dsfield
(
iph
)))
return
;
return
0
;
*
(
u32
*
)
iph
|=
htonl
(
INET_ECN_CE
<<
20
);
*
(
u32
*
)
iph
|=
htonl
(
INET_ECN_CE
<<
20
);
return
1
;
}
}
static
inline
void
IP6_ECN_clear
(
struct
ipv6hdr
*
iph
)
static
inline
void
IP6_ECN_clear
(
struct
ipv6hdr
*
iph
)
...
@@ -105,4 +108,21 @@ static inline void ipv6_copy_dscp(struct ipv6hdr *outer, struct ipv6hdr *inner)
...
@@ -105,4 +108,21 @@ static inline void ipv6_copy_dscp(struct ipv6hdr *outer, struct ipv6hdr *inner)
ipv6_change_dsfield
(
inner
,
INET_ECN_MASK
,
dscp
);
ipv6_change_dsfield
(
inner
,
INET_ECN_MASK
,
dscp
);
}
}
static
inline
int
INET_ECN_set_ce
(
struct
sk_buff
*
skb
)
{
switch
(
skb
->
protocol
)
{
case
__constant_htons
(
ETH_P_IP
):
if
(
skb
->
nh
.
raw
+
sizeof
(
struct
iphdr
)
<=
skb
->
tail
)
return
IP_ECN_set_ce
(
skb
->
nh
.
iph
);
break
;
case
__constant_htons
(
ETH_P_IPV6
):
if
(
skb
->
nh
.
raw
+
sizeof
(
struct
ipv6hdr
)
<=
skb
->
tail
)
return
IP6_ECN_set_ce
(
skb
->
nh
.
ipv6h
);
break
;
}
return
0
;
}
#endif
#endif
include/net/red.h
0 → 100644
View file @
2d43f112
#ifndef __NET_SCHED_RED_H
#define __NET_SCHED_RED_H
#include <linux/config.h>
#include <linux/types.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/dsfield.h>
/* Random Early Detection (RED) algorithm.
=======================================
Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
This file codes a "divisionless" version of RED algorithm
as written down in Fig.17 of the paper.
Short description.
------------------
When a new packet arrives we calculate the average queue length:
avg = (1-W)*avg + W*current_queue_len,
W is the filter time constant (chosen as 2^(-Wlog)), it controls
the inertia of the algorithm. To allow larger bursts, W should be
decreased.
if (avg > th_max) -> packet marked (dropped).
if (avg < th_min) -> packet passes.
if (th_min < avg < th_max) we calculate probability:
Pb = max_P * (avg - th_min)/(th_max-th_min)
and mark (drop) packet with this probability.
Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
max_P should be small (not 1), usually 0.01..0.02 is good value.
max_P is chosen as a number, so that max_P/(th_max-th_min)
is a negative power of two in order arithmetics to contain
only shifts.
Parameters, settable by user:
-----------------------------
qth_min - bytes (should be < qth_max/2)
qth_max - bytes (should be at least 2*qth_min and less limit)
Wlog - bits (<32) log(1/W).
Plog - bits (<32)
Plog is related to max_P by formula:
max_P = (qth_max-qth_min)/2^Plog;
F.e. if qth_max=128K and qth_min=32K, then Plog=22
corresponds to max_P=0.02
Scell_log
Stab
Lookup table for log((1-W)^(t/t_ave).
NOTES:
Upper bound on W.
-----------------
If you want to allow bursts of L packets of size S,
you should choose W:
L + 1 - th_min/S < (1-(1-W)^L)/W
th_min/S = 32 th_min/S = 4
log(W) L
-1 33
-2 35
-3 39
-4 46
-5 57
-6 75
-7 101
-8 135
-9 190
etc.
*/
#define RED_STAB_SIZE 256
#define RED_STAB_MASK (RED_STAB_SIZE - 1)
struct
red_stats
{
u32
prob_drop
;
/* Early probability drops */
u32
prob_mark
;
/* Early probability marks */
u32
forced_drop
;
/* Forced drops, qavg > max_thresh */
u32
forced_mark
;
/* Forced marks, qavg > max_thresh */
u32
pdrop
;
/* Drops due to queue limits */
u32
other
;
/* Drops due to drop() calls */
u32
backlog
;
};
struct
red_parms
{
/* Parameters */
u32
qth_min
;
/* Min avg length threshold: A scaled */
u32
qth_max
;
/* Max avg length threshold: A scaled */
u32
Scell_max
;
u32
Rmask
;
/* Cached random mask, see red_rmask */
u8
Scell_log
;
u8
Wlog
;
/* log(W) */
u8
Plog
;
/* random number bits */
u8
Stab
[
RED_STAB_SIZE
];
/* Variables */
int
qcount
;
/* Number of packets since last random
number generation */
u32
qR
;
/* Cached random number */
unsigned
long
qavg
;
/* Average queue length: A scaled */
psched_time_t
qidlestart
;
/* Start of current idle period */
};
static
inline
u32
red_rmask
(
u8
Plog
)
{
return
Plog
<
32
?
((
1
<<
Plog
)
-
1
)
:
~
0UL
;
}
static
inline
void
red_set_parms
(
struct
red_parms
*
p
,
u32
qth_min
,
u32
qth_max
,
u8
Wlog
,
u8
Plog
,
u8
Scell_log
,
u8
*
stab
)
{
/* Reset average queue length, the value is strictly bound
* to the parameters below, reseting hurts a bit but leaving
* it might result in an unreasonable qavg for a while. --TGR
*/
p
->
qavg
=
0
;
p
->
qcount
=
-
1
;
p
->
qth_min
=
qth_min
<<
Wlog
;
p
->
qth_max
=
qth_max
<<
Wlog
;
p
->
Wlog
=
Wlog
;
p
->
Plog
=
Plog
;
p
->
Rmask
=
red_rmask
(
Plog
);
p
->
Scell_log
=
Scell_log
;
p
->
Scell_max
=
(
255
<<
Scell_log
);
memcpy
(
p
->
Stab
,
stab
,
sizeof
(
p
->
Stab
));
}
static
inline
int
red_is_idling
(
struct
red_parms
*
p
)
{
return
!
PSCHED_IS_PASTPERFECT
(
p
->
qidlestart
);
}
static
inline
void
red_start_of_idle_period
(
struct
red_parms
*
p
)
{
PSCHED_GET_TIME
(
p
->
qidlestart
);
}
static
inline
void
red_end_of_idle_period
(
struct
red_parms
*
p
)
{
PSCHED_SET_PASTPERFECT
(
p
->
qidlestart
);
}
static
inline
void
red_restart
(
struct
red_parms
*
p
)
{
red_end_of_idle_period
(
p
);
p
->
qavg
=
0
;
p
->
qcount
=
-
1
;
}
static
inline
unsigned
long
red_calc_qavg_from_idle_time
(
struct
red_parms
*
p
)
{
psched_time_t
now
;
long
us_idle
;
int
shift
;
PSCHED_GET_TIME
(
now
);
us_idle
=
PSCHED_TDIFF_SAFE
(
now
,
p
->
qidlestart
,
p
->
Scell_max
);
/*
* The problem: ideally, average length queue recalcultion should
* be done over constant clock intervals. This is too expensive, so
* that the calculation is driven by outgoing packets.
* When the queue is idle we have to model this clock by hand.
*
* SF+VJ proposed to "generate":
*
* m = idletime / (average_pkt_size / bandwidth)
*
* dummy packets as a burst after idle time, i.e.
*
* p->qavg *= (1-W)^m
*
* This is an apparently overcomplicated solution (f.e. we have to
* precompute a table to make this calculation in reasonable time)
* I believe that a simpler model may be used here,
* but it is field for experiments.
*/
shift
=
p
->
Stab
[(
us_idle
>>
p
->
Scell_log
)
&
RED_STAB_MASK
];
if
(
shift
)
return
p
->
qavg
>>
shift
;
else
{
/* Approximate initial part of exponent with linear function:
*
* (1-W)^m ~= 1-mW + ...
*
* Seems, it is the best solution to
* problem of too coarse exponent tabulation.
*/
us_idle
=
(
p
->
qavg
*
us_idle
)
>>
p
->
Scell_log
;
if
(
us_idle
<
(
p
->
qavg
>>
1
))
return
p
->
qavg
-
us_idle
;
else
return
p
->
qavg
>>
1
;
}
}
static
inline
unsigned
long
red_calc_qavg_no_idle_time
(
struct
red_parms
*
p
,
unsigned
int
backlog
)
{
/*
* NOTE: p->qavg is fixed point number with point at Wlog.
* The formula below is equvalent to floating point
* version:
*
* qavg = qavg*(1-W) + backlog*W;
*
* --ANK (980924)
*/
return
p
->
qavg
+
(
backlog
-
(
p
->
qavg
>>
p
->
Wlog
));
}
static
inline
unsigned
long
red_calc_qavg
(
struct
red_parms
*
p
,
unsigned
int
backlog
)
{
if
(
!
red_is_idling
(
p
))
return
red_calc_qavg_no_idle_time
(
p
,
backlog
);
else
return
red_calc_qavg_from_idle_time
(
p
);
}
static
inline
u32
red_random
(
struct
red_parms
*
p
)
{
return
net_random
()
&
p
->
Rmask
;
}
static
inline
int
red_mark_probability
(
struct
red_parms
*
p
,
unsigned
long
qavg
)
{
/* The formula used below causes questions.
OK. qR is random number in the interval 0..Rmask
i.e. 0..(2^Plog). If we used floating point
arithmetics, it would be: (2^Plog)*rnd_num,
where rnd_num is less 1.
Taking into account, that qavg have fixed
point at Wlog, and Plog is related to max_P by
max_P = (qth_max-qth_min)/2^Plog; two lines
below have the following floating point equivalent:
max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount
Any questions? --ANK (980924)
*/
return
!
(((
qavg
-
p
->
qth_min
)
>>
p
->
Wlog
)
*
p
->
qcount
<
p
->
qR
);
}
enum
{
RED_BELOW_MIN_THRESH
,
RED_BETWEEN_TRESH
,
RED_ABOVE_MAX_TRESH
,
};
static
inline
int
red_cmp_thresh
(
struct
red_parms
*
p
,
unsigned
long
qavg
)
{
if
(
qavg
<
p
->
qth_min
)
return
RED_BELOW_MIN_THRESH
;
else
if
(
qavg
>=
p
->
qth_max
)
return
RED_ABOVE_MAX_TRESH
;
else
return
RED_BETWEEN_TRESH
;
}
enum
{
RED_DONT_MARK
,
RED_PROB_MARK
,
RED_HARD_MARK
,
};
static
inline
int
red_action
(
struct
red_parms
*
p
,
unsigned
long
qavg
)
{
switch
(
red_cmp_thresh
(
p
,
qavg
))
{
case
RED_BELOW_MIN_THRESH
:
p
->
qcount
=
-
1
;
return
RED_DONT_MARK
;
case
RED_BETWEEN_TRESH
:
if
(
++
p
->
qcount
)
{
if
(
red_mark_probability
(
p
,
qavg
))
{
p
->
qcount
=
0
;
p
->
qR
=
red_random
(
p
);
return
RED_PROB_MARK
;
}
}
else
p
->
qR
=
red_random
(
p
);
return
RED_DONT_MARK
;
case
RED_ABOVE_MAX_TRESH
:
p
->
qcount
=
-
1
;
return
RED_HARD_MARK
;
}
BUG
();
return
RED_DONT_MARK
;
}
#endif
net/sched/sch_gred.c
View file @
2d43f112
This diff is collapsed.
Click to expand it.
net/sched/sch_red.c
View file @
2d43f112
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment