Merge /tmp/iproute2
diff --git a/include/SNAPSHOT.h b/include/SNAPSHOT.h
index 63b857d..8f896b7 100644
--- a/include/SNAPSHOT.h
+++ b/include/SNAPSHOT.h
@@ -1 +1 @@
-static const char SNAPSHOT[] = "130716";
+static const char SNAPSHOT[] = "130903";
diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 51da65b..2b82d7e 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -44,8 +44,8 @@
FRA_FWMARK, /* mark */
FRA_FLOW, /* flow/class id */
FRA_UNUSED6,
- FRA_UNUSED7,
- FRA_UNUSED8,
+ FRA_SUPPRESS_IFGROUP,
+ FRA_SUPPRESS_PREFIXLEN,
FRA_TABLE, /* Extended table id */
FRA_FWMASK, /* mask for netfilter mark */
FRA_OIFNAME,
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index d37e53c..d2de4e6 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -14,6 +14,7 @@
#define _LINUX_IF_BRIDGE_H
#include <linux/types.h>
+#include <linux/if_ether.h>
#define SYSFS_BRIDGE_ATTR "bridge"
#define SYSFS_BRIDGE_FDB "brforward"
@@ -88,7 +89,7 @@
};
struct __fdb_entry {
- __u8 mac_addr[6];
+ __u8 mac_addr[ETH_ALEN];
__u8 port_no;
__u8 is_local;
__u32 ageing_timer_value;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index d07aeca..ee4f2ba 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -143,6 +143,7 @@
IFLA_NUM_TX_QUEUES,
IFLA_NUM_RX_QUEUES,
IFLA_CARRIER,
+ IFLA_PHYS_PORT_ID,
__IFLA_MAX
};
@@ -311,6 +312,8 @@
IFLA_VXLAN_L2MISS,
IFLA_VXLAN_L3MISS,
IFLA_VXLAN_PORT, /* destination port */
+ IFLA_VXLAN_GROUP6,
+ IFLA_VXLAN_LOCAL6,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index dc13de3..75cc8ac 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -56,6 +56,8 @@
#define TUNGETVNETHDRSZ _IOR('T', 215, int)
#define TUNSETVNETHDRSZ _IOW('T', 216, int)
#define TUNSETQUEUE _IOW('T', 217, int)
+#define TUNSETIFINDEX _IOW('T', 218, unsigned int)
+#define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
@@ -70,6 +72,10 @@
#define IFF_DETACH_QUEUE 0x0400
/* read-only flag */
#define IFF_PERSIST 0x0800
+#define IFF_NOFILTER 0x1000
+
+/* Socket options */
+#define TUN_TX_TIMESTAMP 1
/* Features for GSO (TUNSETOFFLOAD). */
#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index dbd71b0..9b82913 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -73,9 +73,17 @@
#define TC_H_ROOT (0xFFFFFFFFU)
#define TC_H_INGRESS (0xFFFFFFF1U)
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+ TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+ TC_LINKLAYER_ETHERNET,
+ TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
struct tc_ratespec {
unsigned char cell_log;
- unsigned char __reserved;
+ __u8 linklayer; /* lower 4 bits */
unsigned short overhead;
short cell_align;
unsigned short mpu;
@@ -736,4 +744,45 @@
};
};
+/* FQ */
+
+enum {
+ TCA_FQ_UNSPEC,
+
+ TCA_FQ_PLIMIT, /* limit of total number of packets in queue */
+
+ TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */
+
+ TCA_FQ_QUANTUM, /* RR quantum */
+
+ TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */
+
+ TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */
+
+ TCA_FQ_FLOW_DEFAULT_RATE,/* for sockets with unspecified sk_rate,
+ * use the following rate
+ */
+
+ TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */
+
+ TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */
+ __TCA_FQ_MAX
+};
+
+#define TCA_FQ_MAX (__TCA_FQ_MAX - 1)
+
+struct tc_fq_qd_stats {
+ __u64 gc_flows;
+ __u64 highprio_packets;
+ __u64 tcp_retrans;
+ __u64 throttled;
+ __u64 flows_plimit;
+ __u64 pkts_too_long;
+ __u64 allocation_errors;
+ __s64 time_next_delayed_flow;
+ __u32 flows;
+ __u32 inactive_flows;
+ __u32 throttled_flows;
+ __u32 pad;
+};
#endif
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1eb04d3..8df6bd7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -111,6 +111,7 @@
#define TCP_REPAIR_OPTIONS 22
#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */
#define TCP_TIMESTAMP 24
+#define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */
struct tcp_repair_opt {
__u32 opt_code;
diff --git a/ip/iplink_macvlan.c b/ip/iplink_macvlan.c
index 5b4b868..ec51106 100644
--- a/ip/iplink_macvlan.c
+++ b/ip/iplink_macvlan.c
@@ -79,7 +79,7 @@
RTA_PAYLOAD(tb[IFLA_MACVLAN_MODE]) < sizeof(__u32))
return;
- mode = rta_getattr_u32(tb[IFLA_VLAN_ID]);
+ mode = rta_getattr_u32(tb[IFLA_MACVLAN_MODE]);
fprintf(f, " mode %s ",
mode == MACVLAN_MODE_PRIVATE ? "private"
: mode == MACVLAN_MODE_VEPA ? "vepa"
diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c
index 4895390..70f2a7a 100644
--- a/ip/ipmonitor.c
+++ b/ip/ipmonitor.c
@@ -36,7 +36,6 @@
exit(-1);
}
-
static int accept_msg(const struct sockaddr_nl *who,
struct nlmsghdr *n, void *arg)
{
@@ -88,6 +87,13 @@
}
if (n->nlmsg_type == RTM_NEWNEIGH || n->nlmsg_type == RTM_DELNEIGH ||
n->nlmsg_type == RTM_GETNEIGH) {
+ if (preferred_family) {
+ struct ndmsg *r = NLMSG_DATA(n);
+
+ if (r->ndm_family != preferred_family)
+ return 0;
+ }
+
if (prefix_banner)
fprintf(fp, "[NEIGH]");
print_neigh(who, n, arg);
diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index c8a4792..89dda3f 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -205,11 +205,15 @@
exit(1);
}
- /* If child failed, propagate status */
- if (WIFEXITED(status))
- exit(WEXITSTATUS(status));
+ if (WIFEXITED(status)) {
+ /* ip must return the status of the child,
+ * but do_cmd() will add a minus to this,
+ * so let's add another one here to cancel it.
+ */
+ return -WEXITSTATUS(status);
+ }
- return 0;
+ exit(1);
}
}
diff --git a/ip/iproute.c b/ip/iproute.c
index b069f1e..25a56d1 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -63,7 +63,7 @@
fprintf(stderr, " ip route restore\n");
fprintf(stderr, " ip route showdump\n");
fprintf(stderr, " ip route get ADDRESS [ from ADDRESS iif STRING ]\n");
- fprintf(stderr, " [ oif STRING ] [ tos TOS ]\n");
+ fprintf(stderr, " [ oif STRING ] [ tos TOS ]\n");
fprintf(stderr, " [ mark NUMBER ]\n");
fprintf(stderr, " ip route { add | del | change | append | replace } ROUTE\n");
fprintf(stderr, "SELECTOR := [ root PREFIX ] [ match PREFIX ] [ exact PREFIX ]\n");
@@ -76,7 +76,7 @@
fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n");
fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n");
- fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [reordering NUMBER ]\n");
+ fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n");
fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n");
fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n");
fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n");
diff --git a/ip/iprule.c b/ip/iprule.c
index a5fcd43..d934f67 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -39,6 +39,9 @@
fprintf(stderr, " [ prohibit | reject | unreachable ]\n");
fprintf(stderr, " [ realms [SRCREALM/]DSTREALM ]\n");
fprintf(stderr, " [ goto NUMBER ]\n");
+ fprintf(stderr, " SUPPRESSOR\n");
+ fprintf(stderr, "SUPPRESSOR := [ suppress_prefixlength NUMBER ]\n");
+ fprintf(stderr, " [ suppress_ifgroup DEVGROUP ]\n");
fprintf(stderr, "TABLE_ID := [ local | main | default | NUMBER ]\n");
exit(-1);
}
@@ -153,9 +156,24 @@
}
table = rtm_get_table(r, tb);
- if (table)
+ if (table) {
fprintf(fp, "lookup %s ", rtnl_rttable_n2a(table, b1, sizeof(b1)));
+ if (tb[FRA_SUPPRESS_PREFIXLEN]) {
+ int pl = rta_getattr_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
+ if (pl != -1) {
+ fprintf(fp, "suppress_prefixlength %d ", pl);
+ }
+ }
+ if (tb[FRA_SUPPRESS_IFGROUP]) {
+ int group = rta_getattr_u32(tb[FRA_SUPPRESS_IFGROUP]);
+ if (group != -1) {
+ SPRINT_BUF(b1);
+ fprintf(fp, "suppress_ifgroup %s ", rtnl_group_n2a(group, b1, sizeof(b1)));
+ }
+ }
+ }
+
if (tb[FRA_FLOW]) {
__u32 to = rta_getattr_u32(tb[FRA_FLOW]);
__u32 from = to>>16;
@@ -310,6 +328,20 @@
addattr32(&req.n, sizeof(req), FRA_TABLE, tid);
}
table_ok = 1;
+ } else if (matches(*argv, "suppress_prefixlength") == 0 ||
+ strcmp(*argv, "sup_pl") == 0) {
+ int pl;
+ NEXT_ARG();
+ if (get_s32(&pl, *argv, 0) || pl < 0)
+ invarg("suppress_prefixlength value is invalid\n", *argv);
+ addattr32(&req.n, sizeof(req), FRA_SUPPRESS_PREFIXLEN, pl);
+ } else if (matches(*argv, "suppress_ifgroup") == 0 ||
+ strcmp(*argv, "sup_group") == 0) {
+ NEXT_ARG();
+ int group;
+ if (rtnl_group_a2n(&group, *argv))
+ invarg("Invalid \"suppress_ifgroup\" value\n", *argv);
+ addattr32(&req.n, sizeof(req), FRA_SUPPRESS_IFGROUP, group);
} else if (strcmp(*argv, "dev") == 0 ||
strcmp(*argv, "iif") == 0) {
NEXT_ARG();
diff --git a/ip/iptunnel.c b/ip/iptunnel.c
index 43f8585..40186d3 100644
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -280,7 +280,7 @@
return -1;
if (p.iph.ttl && p.iph.frag_off == 0) {
- fprintf(stderr, "ttl != 0 and noptmudisc are incompatible\n");
+ fprintf(stderr, "ttl != 0 and nopmtudisc are incompatible\n");
return -1;
}
diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c
index 0a3a9fb..411d9d5 100644
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -856,7 +856,7 @@
if (flags)
fprintf(fp, "%x", flags);
}
- if (show_stats > 0 || tb[XFRMA_SA_EXTRA_FLAGS]) {
+ if (show_stats > 0 && tb[XFRMA_SA_EXTRA_FLAGS]) {
__u32 extra_flags = *(__u32 *)RTA_DATA(tb[XFRMA_SA_EXTRA_FLAGS]);
fprintf(fp, "extra_flag ");
diff --git a/ip/link_iptnl.c b/ip/link_iptnl.c
index 394254d..d5324f8 100644
--- a/ip/link_iptnl.c
+++ b/ip/link_iptnl.c
@@ -233,7 +233,7 @@
}
if (ttl && pmtudisc == 0) {
- fprintf(stderr, "ttl != 0 and noptmudisc are incompatible\n");
+ fprintf(stderr, "ttl != 0 and nopmtudisc are incompatible\n");
exit(-1);
}
diff --git a/man/man8/bridge.8 b/man/man8/bridge.8
index 66678b5..9a34804 100644
--- a/man/man8/bridge.8
+++ b/man/man8/bridge.8
@@ -13,7 +13,7 @@
.ti -8
.IR OBJECT " := { "
-.BR link " | " fdb " | " vlan " | " monitor " }"
+.BR link " | " fdb " | " mdb " | " vlan " | " monitor " }"
.sp
.ti -8
@@ -65,6 +65,21 @@
.IR DEV " ]"
.ti -8
+.BR "bridge mdb" " { " add " | " del " } "
+.B dev
+.IR DEV
+.B port
+.IR PORT
+.B grp
+.IR GROUP " [ "
+.BR permanent " | " temp " ]"
+
+.ti -8
+.BR "bridge mdb show " [ "
+.B dev
+.IR DEV " ]"
+
+.ti -8
.BR "bridge vlan" " { " add " | " del " } "
.B dev
.IR DEV
@@ -79,7 +94,7 @@
.IR DEV " ]"
.ti -8
-.BR "bridge monitor" " [ " all " | " neigh " | " link " ]"
+.BR "bridge monitor" " [ " all " | " neigh " | " link " | " mdb " ]"
.SH OPTIONS
@@ -110,6 +125,10 @@
- Forwarding Database entry.
.TP
+.B mdb
+- Multicast group database entry.
+
+.TP
.B vlan
- VLAN filter list.
@@ -326,6 +345,69 @@
option, the command becomes verbose. It prints out the last updated
and last used time for each entry.
+.SH bridge mdb - multicast group database management
+
+.B mdb
+objects contain known IP multicast group addresses on a link.
+
+.P
+The corresponding commands display mdb entries, add new entries,
+and delete old ones.
+
+.SS bridge mdb add - add a new multicast group database entry
+
+This command creates a new mdb entry.
+
+.TP
+.BI dev " DEV"
+the interface where this group address is associated.
+
+.TP
+.BI port " PORT"
+the port whose link is known to have members of this multicast group.
+
+.TP
+.BI grp " GROUP"
+the IP multicast group address whose members reside on the link connected to
+the port.
+
+.B permanent
+- the mdb entry is permanent
+.sp
+
+.B temp
+- the mdb entry is temporary (default)
+.sp
+
+.in -8
+.SS bridge mdb delete - delete a multicast group database entry
+This command removes an existing mdb entry.
+
+.PP
+The arguments are the same as with
+.BR "bridge mdb add" .
+
+.SS bridge mdb show - list multicast group database entries
+
+This command displays the current multicast group membership table. The table
+is populated by IGMP and MLD snooping in the bridge driver automatically. It
+can be altered by
+.B bridge mdb add
+and
+.B bridge mdb del
+commands manually too.
+
+.TP
+.BI dev " DEV"
+the interface only whose entries should be listed. Default is to list all
+bridge interfaces.
+
+.PP
+With the
+.B -details
+option, the command becomes verbose. It prints out the ports known to have
+a connected router.
+
.SH bridge vlan - VLAN filter list
.B vlan
@@ -395,7 +477,7 @@
.I OBJECT-LIST
is the list of object types that we want to monitor.
It may contain
-.BR link ", and " fdb "."
+.BR link ", " fdb ", and " mdb "."
If no
.B file
argument is given,
diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in
index 7191bce..79bc7f1 100644
--- a/man/man8/ip-route.8.in
+++ b/man/man8/ip-route.8.in
@@ -97,6 +97,8 @@
.IR TIME " ] [ "
.B rttvar
.IR TIME " ] [ "
+.B reordering
+.IR NUMBER " ] [ "
.B window
.IR NUMBER " ] [ "
.B cwnd
@@ -110,7 +112,7 @@
.B initcwnd
.IR NUMBER " ] [ "
.B initrwnd
-.IR NUMBER " ]"
+.IR NUMBER " ] [ "
.B quickack
.IR BOOL " ]"
diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8
index 36e46f1..62df3b0 100644
--- a/man/man8/ip-rule.8
+++ b/man/man8/ip-rule.8
@@ -43,6 +43,14 @@
.IR ADDRESS " ] [ "
.BR prohibit " | " reject " | " unreachable " ] [ " realms
.RI "[" SRCREALM "/]" DSTREALM " ]"
+.I SUPPRESSOR
+
+.ti -8
+.IR SUPPRESSOR " := [ "
+.B suppress_prefixlength
+.IR NUMBER " ] [ "
+.B suppress_ifgroup
+.IR GROUP " ]"
.ti -8
.IR TABLE_ID " := [ "
@@ -217,6 +225,15 @@
It is also possible to use lookup instead of table.
.TP
+.BI suppress_prefixlength " NUMBER"
+reject routing decisions that have a prefix length of NUMBER or less.
+
+.TP
+.BI suppress_ifgroup " GROUP"
+reject routing decisions that use a device belonging to the interface
+group GROUP.
+
+.TP
.BI realms " FROM/TO"
Realms to select if the rule matched and the routing table lookup
succeeded. Realm
diff --git a/tc/Makefile b/tc/Makefile
index f26e764..1eeabd8 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -50,6 +50,7 @@
TCMODULES += q_mqprio.o
TCMODULES += q_codel.o
TCMODULES += q_fq_codel.o
+TCMODULES += q_fq.o
ifeq ($(TC_CONFIG_IPSET), y)
ifeq ($(TC_CONFIG_XT), y)
diff --git a/tc/m_police.c b/tc/m_police.c
index 53cbefc..300287e 100644
--- a/tc/m_police.c
+++ b/tc/m_police.c
@@ -322,9 +322,11 @@
print_police(struct action_util *a, FILE *f, struct rtattr *arg)
{
SPRINT_BUF(b1);
+ SPRINT_BUF(b2);
struct tc_police *p;
struct rtattr *tb[TCA_POLICE_MAX+1];
unsigned buffer;
+ unsigned int linklayer;
if (arg == NULL)
return 0;
@@ -360,6 +362,9 @@
} else
fprintf(f, " ");
fprintf(f, "overhead %ub ", p->rate.overhead);
+ linklayer = (p->rate.linklayer & TC_LINKLAYER_MASK);
+ if (linklayer > TC_LINKLAYER_ETHERNET || show_details)
+ fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b2));
fprintf(f, "\nref %d bind %d\n",p->refcnt, p->bindcnt);
return 0;
diff --git a/tc/q_cbq.c b/tc/q_cbq.c
index 3c5e72c..d76600c 100644
--- a/tc/q_cbq.c
+++ b/tc/q_cbq.c
@@ -442,7 +442,9 @@
struct tc_cbq_wrropt *wrr = NULL;
struct tc_cbq_fopt *fopt = NULL;
struct tc_cbq_ovl *ovl = NULL;
+ unsigned int linklayer;
SPRINT_BUF(b1);
+ SPRINT_BUF(b2);
if (opt == NULL)
return 0;
@@ -486,6 +488,9 @@
char buf[64];
print_rate(buf, sizeof(buf), r->rate);
fprintf(f, "rate %s ", buf);
+ linklayer = (r->linklayer & TC_LINKLAYER_MASK);
+ if (linklayer > TC_LINKLAYER_ETHERNET || show_details)
+ fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b2));
if (show_details) {
fprintf(f, "cell %ub ", 1<<r->cell_log);
if (r->mpu)
diff --git a/tc/q_fq.c b/tc/q_fq.c
new file mode 100644
index 0000000..c1f658e
--- /dev/null
+++ b/tc/q_fq.c
@@ -0,0 +1,279 @@
+/*
+ * Fair Queue
+ *
+ * Copyright (C) 2013 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+ fprintf(stderr, "Usage: ... fq [ limit PACKETS ] [ flow_limit PACKETS ]\n");
+ fprintf(stderr, " [ quantum BYTES ] [ initial_quantum BYTES ]\n");
+ fprintf(stderr, " [ maxrate RATE ] [ buckets NUMBER ]\n");
+ fprintf(stderr, " [ [no]pacing ]\n");
+}
+
+static unsigned int ilog2(unsigned int val)
+{
+ unsigned int res = 0;
+
+ val--;
+ while (val) {
+ res++;
+ val >>= 1;
+ }
+ return res;
+}
+
+static int fq_parse_opt(struct qdisc_util *qu, int argc, char **argv,
+ struct nlmsghdr *n)
+{
+ unsigned int plimit = ~0U;
+ unsigned int flow_plimit = ~0U;
+ unsigned int quantum = ~0U;
+ unsigned int initial_quantum = ~0U;
+ unsigned int buckets = 0;
+ unsigned int maxrate = ~0U;
+ unsigned int defrate = ~0U;
+ int pacing = -1;
+ struct rtattr *tail;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "limit") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&plimit, *argv, 0)) {
+ fprintf(stderr, "Illegal \"limit\"\n");
+ return -1;
+ }
+ } else if (strcmp(*argv, "flow_limit") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&flow_plimit, *argv, 0)) {
+ fprintf(stderr, "Illegal \"flow_limit\"\n");
+ return -1;
+ }
+ } else if (strcmp(*argv, "buckets") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&buckets, *argv, 0)) {
+ fprintf(stderr, "Illegal \"buckets\"\n");
+ return -1;
+ }
+ } else if (strcmp(*argv, "maxrate") == 0) {
+ NEXT_ARG();
+ if (get_rate(&maxrate, *argv)) {
+ fprintf(stderr, "Illegal \"maxrate\"\n");
+ return -1;
+ }
+ } else if (strcmp(*argv, "defrate") == 0) {
+ NEXT_ARG();
+ if (get_rate(&defrate, *argv)) {
+ fprintf(stderr, "Illegal \"defrate\"\n");
+ return -1;
+ }
+ } else if (strcmp(*argv, "quantum") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&quantum, *argv, 0)) {
+ fprintf(stderr, "Illegal \"quantum\"\n");
+ return -1;
+ }
+ } else if (strcmp(*argv, "initial_quantum") == 0) {
+ NEXT_ARG();
+ if (get_unsigned(&initial_quantum, *argv, 0)) {
+ fprintf(stderr, "Illegal \"initial_quantum\"\n");
+ return -1;
+ }
+ } else if (strcmp(*argv, "pacing") == 0) {
+ pacing = 1;
+ } else if (strcmp(*argv, "nopacing") == 0) {
+ pacing = 0;
+ } else if (strcmp(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+ argc--; argv++;
+ }
+
+ tail = NLMSG_TAIL(n);
+ addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+ if (buckets) {
+ unsigned int log = ilog2(buckets);
+
+ addattr_l(n, 1024, TCA_FQ_BUCKETS_LOG,
+ &log, sizeof(log));
+ }
+ if (plimit != ~0U)
+ addattr_l(n, 1024, TCA_FQ_PLIMIT,
+ &plimit, sizeof(plimit));
+ if (flow_plimit != ~0U)
+ addattr_l(n, 1024, TCA_FQ_FLOW_PLIMIT,
+ &flow_plimit, sizeof(flow_plimit));
+ if (quantum != ~0U)
+ addattr_l(n, 1024, TCA_FQ_QUANTUM, &quantum, sizeof(quantum));
+ if (initial_quantum != ~0U)
+ addattr_l(n, 1024, TCA_FQ_INITIAL_QUANTUM,
+ &initial_quantum, sizeof(initial_quantum));
+ if (pacing != -1)
+ addattr_l(n, 1024, TCA_FQ_RATE_ENABLE,
+ &pacing, sizeof(pacing));
+ if (maxrate != ~0U)
+ addattr_l(n, 1024, TCA_FQ_FLOW_MAX_RATE,
+ &maxrate, sizeof(maxrate));
+ if (defrate != ~0U)
+ addattr_l(n, 1024, TCA_FQ_FLOW_DEFAULT_RATE,
+ &defrate, sizeof(defrate));
+ tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
+ return 0;
+}
+
+static int fq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ struct rtattr *tb[TCA_FQ_MAX + 1];
+ unsigned int plimit, flow_plimit;
+ unsigned int buckets_log;
+ int pacing;
+ unsigned int rate, quantum;
+ SPRINT_BUF(b1);
+
+ if (opt == NULL)
+ return 0;
+
+ parse_rtattr_nested(tb, TCA_FQ_MAX, opt);
+
+ if (tb[TCA_FQ_PLIMIT] &&
+ RTA_PAYLOAD(tb[TCA_FQ_PLIMIT]) >= sizeof(__u32)) {
+ plimit = rta_getattr_u32(tb[TCA_FQ_PLIMIT]);
+ fprintf(f, "limit %up ", plimit);
+ }
+ if (tb[TCA_FQ_FLOW_PLIMIT] &&
+ RTA_PAYLOAD(tb[TCA_FQ_FLOW_PLIMIT]) >= sizeof(__u32)) {
+ flow_plimit = rta_getattr_u32(tb[TCA_FQ_FLOW_PLIMIT]);
+ fprintf(f, "flow_limit %up ", flow_plimit);
+ }
+ if (tb[TCA_FQ_BUCKETS_LOG] &&
+ RTA_PAYLOAD(tb[TCA_FQ_BUCKETS_LOG]) >= sizeof(__u32)) {
+ buckets_log = rta_getattr_u32(tb[TCA_FQ_BUCKETS_LOG]);
+ fprintf(f, "buckets %u ", 1U << buckets_log);
+ }
+ if (tb[TCA_FQ_RATE_ENABLE] &&
+ RTA_PAYLOAD(tb[TCA_FQ_RATE_ENABLE]) >= sizeof(int)) {
+ pacing = rta_getattr_u32(tb[TCA_FQ_RATE_ENABLE]);
+ if (pacing == 0)
+ fprintf(f, "nopacing ");
+ }
+ if (tb[TCA_FQ_QUANTUM] &&
+ RTA_PAYLOAD(tb[TCA_FQ_QUANTUM]) >= sizeof(__u32)) {
+ quantum = rta_getattr_u32(tb[TCA_FQ_QUANTUM]);
+ fprintf(f, "quantum %u ", quantum);
+ }
+ if (tb[TCA_FQ_INITIAL_QUANTUM] &&
+ RTA_PAYLOAD(tb[TCA_FQ_INITIAL_QUANTUM]) >= sizeof(__u32)) {
+ quantum = rta_getattr_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+ fprintf(f, "initial_quantum %u ", quantum);
+ }
+ if (tb[TCA_FQ_FLOW_MAX_RATE] &&
+ RTA_PAYLOAD(tb[TCA_FQ_FLOW_MAX_RATE]) >= sizeof(__u32)) {
+ rate = rta_getattr_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
+
+ if (rate != ~0U)
+ fprintf(f, "maxrate %s ", sprint_rate(rate, b1));
+ }
+ if (tb[TCA_FQ_FLOW_DEFAULT_RATE] &&
+ RTA_PAYLOAD(tb[TCA_FQ_FLOW_DEFAULT_RATE]) >= sizeof(__u32)) {
+ rate = rta_getattr_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
+
+ if (rate != 0)
+ fprintf(f, "defrate %s ", sprint_rate(rate, b1));
+ }
+
+ return 0;
+}
+
+static int fq_print_xstats(struct qdisc_util *qu, FILE *f,
+ struct rtattr *xstats)
+{
+ struct tc_fq_qd_stats *st;
+
+ if (xstats == NULL)
+ return 0;
+
+ if (RTA_PAYLOAD(xstats) < sizeof(*st))
+ return -1;
+
+ st = RTA_DATA(xstats);
+
+ fprintf(f, " %u flows (%u inactive, %u throttled)",
+ st->flows, st->inactive_flows, st->throttled_flows);
+
+ if (st->time_next_delayed_flow > 0)
+ fprintf(f, ", next packet delay %llu ns", st->time_next_delayed_flow);
+
+ fprintf(f, "\n %llu gc, %llu highprio",
+ st->gc_flows, st->highprio_packets);
+
+ if (st->tcp_retrans)
+ fprintf(f, ", %llu retrans", st->tcp_retrans);
+
+ fprintf(f, ", %llu throttled", st->throttled);
+
+ if (st->flows_plimit)
+ fprintf(f, ", %llu flows_plimit", st->flows_plimit);
+
+ if (st->pkts_too_long || st->allocation_errors)
+ fprintf(f, "\n %llu too long pkts, %llu alloc errors\n",
+ st->pkts_too_long, st->allocation_errors);
+
+ return 0;
+}
+
+struct qdisc_util fq_qdisc_util = {
+ .id = "fq",
+ .parse_qopt = fq_parse_opt,
+ .print_qopt = fq_print_opt,
+ .print_xstats = fq_print_xstats,
+};
diff --git a/tc/q_htb.c b/tc/q_htb.c
index 9321c0a..6737ddb 100644
--- a/tc/q_htb.c
+++ b/tc/q_htb.c
@@ -31,9 +31,11 @@
static void explain(void)
{
fprintf(stderr, "Usage: ... qdisc add ... htb [default N] [r2q N]\n"
+ " [direct_qlen P]\n"
" default minor id of class to which unclassified packets are sent {0}\n"
" r2q DRR quantums are computed as rate in Bps/r2q {10}\n"
" debug string of 16 numbers each 0-3 {0}\n\n"
+ " direct_qlen Limit of the direct queue {in packets}\n"
"... class add ... htb rate R1 [burst B1] [mpu B] [overhead O]\n"
" [prio P] [slot S] [pslot PS]\n"
" [ceil R2] [cburst B2] [mtu MTU] [quantum Q]\n"
@@ -108,6 +110,7 @@
unsigned mtu;
unsigned short mpu = 0;
unsigned short overhead = 0;
+ unsigned int direct_qlen = ~0U;
unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */
struct rtattr *tail;
@@ -125,6 +128,11 @@
if (get_u32(&mtu, *argv, 10)) {
explain1("mtu"); return -1;
}
+ } else if (matches(*argv, "direct_qlen") == 0) {
+ NEXT_ARG();
+ if (get_u32(&direct_qlen, *argv, 10)) {
+ explain1("direct_qlen"); return -1;
+ }
} else if (matches(*argv, "mpu") == 0) {
NEXT_ARG();
if (get_u16(&mpu, *argv, 10)) {
@@ -230,6 +238,9 @@
opt.cbuffer = tc_calc_xmittime(opt.ceil.rate, cbuffer);
tail = NLMSG_TAIL(n);
+ if (direct_qlen != ~0U)
+ addattr_l(n, 1024, TCA_HTB_DIRECT_QLEN,
+ &direct_qlen, sizeof(direct_qlen));
addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
addattr_l(n, 2024, TCA_HTB_PARMS, &opt, sizeof(opt));
addattr_l(n, 3024, TCA_HTB_RTAB, rtab, 1024);
@@ -240,18 +251,20 @@
static int htb_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
{
- struct rtattr *tb[TCA_HTB_RTAB+1];
+ struct rtattr *tb[TCA_HTB_MAX + 1];
struct tc_htb_opt *hopt;
struct tc_htb_glob *gopt;
double buffer,cbuffer;
+ unsigned int linklayer;
SPRINT_BUF(b1);
SPRINT_BUF(b2);
SPRINT_BUF(b3);
+ SPRINT_BUF(b4);
if (opt == NULL)
return 0;
- parse_rtattr_nested(tb, TCA_HTB_RTAB, opt);
+ parse_rtattr_nested(tb, TCA_HTB_MAX, opt);
if (tb[TCA_HTB_PARMS]) {
hopt = RTA_DATA(tb[TCA_HTB_PARMS]);
@@ -268,6 +281,9 @@
buffer = tc_calc_xmitsize(hopt->rate.rate, hopt->buffer);
fprintf(f, "ceil %s ", sprint_rate(hopt->ceil.rate, b1));
cbuffer = tc_calc_xmitsize(hopt->ceil.rate, hopt->cbuffer);
+ linklayer = (hopt->rate.linklayer & TC_LINKLAYER_MASK);
+ if (linklayer > TC_LINKLAYER_ETHERNET || show_details)
+ fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b4));
if (show_details) {
fprintf(f, "burst %s/%u mpu %s overhead %s ",
sprint_size(buffer, b1),
@@ -297,6 +313,12 @@
if (show_details)
fprintf(f," ver %d.%d",gopt->version >> 16,gopt->version & 0xffff);
}
+ if (tb[TCA_HTB_DIRECT_QLEN] &&
+ RTA_PAYLOAD(tb[TCA_HTB_DIRECT_QLEN]) >= sizeof(__u32)) {
+ __u32 direct_qlen = rta_getattr_u32(tb[TCA_HTB_DIRECT_QLEN]);
+
+ fprintf(f, " direct_qlen %u", direct_qlen);
+ }
return 0;
}
diff --git a/tc/q_tbf.c b/tc/q_tbf.c
index 72cfff6..34784a4 100644
--- a/tc/q_tbf.c
+++ b/tc/q_tbf.c
@@ -239,10 +239,12 @@
{
struct rtattr *tb[TCA_TBF_PTAB+1];
struct tc_tbf_qopt *qopt;
+ unsigned int linklayer;
double buffer, mtu;
double latency;
SPRINT_BUF(b1);
SPRINT_BUF(b2);
+ SPRINT_BUF(b3);
if (opt == NULL)
return 0;
@@ -294,6 +296,9 @@
if (qopt->rate.overhead) {
fprintf(f, "overhead %d", qopt->rate.overhead);
}
+ linklayer = (qopt->rate.linklayer & TC_LINKLAYER_MASK);
+ if (linklayer > TC_LINKLAYER_ETHERNET || show_details)
+ fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b3));
return 0;
}
diff --git a/tc/tc_core.c b/tc/tc_core.c
index 85b072e..a524337 100644
--- a/tc/tc_core.c
+++ b/tc/tc_core.c
@@ -102,6 +102,21 @@
}
}
+/* Notice, the rate table calculated here, have gotten replaced in the
+ * kernel and is no-longer used for lookups.
+ *
+ * This happened in kernel release v3.8 caused by kernel
+ * - commit 56b765b79 ("htb: improved accuracy at high rates").
+ * This change unfortunately caused breakage of tc overhead and
+ * linklayer parameters.
+ *
+ * Kernel overhead handling got fixed in kernel v3.10 by
+ * - commit 01cb71d2d47 (net_sched: restore "overhead xxx" handling)
+ *
+ * Kernel linklayer handling got fixed in kernel v3.11 by
+ * - commit 8a8e3d84b17 (net_sched: restore "linklayer atm" handling)
+ */
+
/*
rtab[pkt_len>>cell_log] = pkt_xmit_time
*/
@@ -131,6 +146,7 @@
r->cell_align=-1; // Due to the sz calc
r->cell_log=cell_log;
+ r->linklayer = (linklayer & TC_LINKLAYER_MASK);
return cell_log;
}
diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c
index f3bf5b5..3002a56 100644
--- a/tc/tc_qdisc.c
+++ b/tc/tc_qdisc.c
@@ -137,15 +137,15 @@
if (est.ewma_log)
addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));
- if (q) {
- if (!q->parse_qopt) {
- fprintf(stderr, "qdisc '%s' does not support option parsing\n", k);
- return -1;
- }
- if (q->parse_qopt(q, argc, argv, &req.n))
- return 1;
- } else {
- if (argc) {
+ if (argc) {
+ if (q) {
+ if (!q->parse_qopt) {
+ fprintf(stderr, "qdisc '%s' does not support option parsing\n", k);
+ return -1;
+ }
+ if (q->parse_qopt(q, argc, argv, &req.n))
+ return 1;
+ } else {
if (matches(*argv, "help") == 0)
usage();
diff --git a/tc/tc_util.c b/tc/tc_util.c
index 8114c97..be3ed07 100644
--- a/tc/tc_util.c
+++ b/tc/tc_util.c
@@ -171,20 +171,24 @@
return 0;
}
-void print_rate(char *buf, int len, __u32 rate)
+void print_rate(char *buf, int len, __u64 rate)
{
double tmp = (double)rate*8;
extern int use_iec;
if (use_iec) {
- if (tmp >= 1000.0*1024.0*1024.0)
+ if (tmp >= 1000.0*1024.0*1024.0*1024.0)
+ snprintf(buf, len, "%.0fGibit", tmp/(1024.0*1024.0*1024.0));
+ else if (tmp >= 1000.0*1024.0*1024.0)
snprintf(buf, len, "%.0fMibit", tmp/(1024.0*1024.0));
else if (tmp >= 1000.0*1024)
snprintf(buf, len, "%.0fKibit", tmp/1024);
else
snprintf(buf, len, "%.0fbit", tmp);
} else {
- if (tmp >= 1000.0*1000000.0)
+ if (tmp >= 1000.0*1000000000.0)
+ snprintf(buf, len, "%.0fGbit", tmp/1000000000.0);
+ else if (tmp >= 1000.0*1000000.0)
snprintf(buf, len, "%.0fMbit", tmp/1000000.0);
else if (tmp >= 1000.0 * 1000.0)
snprintf(buf, len, "%.0fKbit", tmp/1000.0);
@@ -193,7 +197,7 @@
}
}
-char * sprint_rate(__u32 rate, char *buf)
+char * sprint_rate(__u64 rate, char *buf)
{
print_rate(buf, SPRINT_BSIZE-1, rate);
return buf;
@@ -460,9 +464,19 @@
q.drops, q.overlimits, q.requeues);
}
- if (tbs[TCA_STATS_RATE_EST]) {
+ if (tbs[TCA_STATS_RATE_EST64]) {
+ struct gnet_stats_rate_est64 re = {0};
+
+ memcpy(&re, RTA_DATA(tbs[TCA_STATS_RATE_EST64]),
+ MIN(RTA_PAYLOAD(tbs[TCA_STATS_RATE_EST64]),
+ sizeof(re)));
+ fprintf(fp, "\n%srate %s %llupps ",
+ prefix, sprint_rate(re.bps, b1), re.pps);
+ } else if (tbs[TCA_STATS_RATE_EST]) {
struct gnet_stats_rate_est re = {0};
- memcpy(&re, RTA_DATA(tbs[TCA_STATS_RATE_EST]), MIN(RTA_PAYLOAD(tbs[TCA_STATS_RATE_EST]), sizeof(re)));
+
+ memcpy(&re, RTA_DATA(tbs[TCA_STATS_RATE_EST]),
+ MIN(RTA_PAYLOAD(tbs[TCA_STATS_RATE_EST]), sizeof(re)));
fprintf(fp, "\n%srate %s %upps ",
prefix, sprint_rate(re.bps, b1), re.pps);
}
diff --git a/tc/tc_util.h b/tc/tc_util.h
index 4f54436..7c3709f 100644
--- a/tc/tc_util.h
+++ b/tc/tc_util.h
@@ -63,12 +63,12 @@
extern int get_time(unsigned *time, const char *str);
extern int get_linklayer(unsigned *val, const char *arg);
-extern void print_rate(char *buf, int len, __u32 rate);
+extern void print_rate(char *buf, int len, __u64 rate);
extern void print_size(char *buf, int len, __u32 size);
extern void print_qdisc_handle(char *buf, int len, __u32 h);
extern void print_time(char *buf, int len, __u32 time);
extern void print_linklayer(char *buf, int len, unsigned linklayer);
-extern char * sprint_rate(__u32 rate, char *buf);
+extern char * sprint_rate(__u64 rate, char *buf);
extern char * sprint_size(__u32 size, char *buf);
extern char * sprint_qdisc_handle(__u32 h, char *buf);
extern char * sprint_tc_classid(__u32 h, char *buf);