blob: 1ef25e6ee1f9084bd1dcf6df43766fd8d56fcada [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020032#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110033#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070034#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <net/pkt_sched.h>
36
Linus Torvalds1da177e2005-04-16 15:20:36 -070037static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
38 struct Qdisc *old, struct Qdisc *new);
39static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
40 struct Qdisc *q, unsigned long cl, int event);
41
42/*
43
44 Short review.
45 -------------
46
47 This file consists of two interrelated parts:
48
49 1. queueing disciplines manager frontend.
50 2. traffic classes manager frontend.
51
52 Generally, queueing discipline ("qdisc") is a black box,
53 which is able to enqueue packets and to dequeue them (when
54 device is ready to send something) in order and at times
55 determined by algorithm hidden in it.
56
57 qdisc's are divided to two categories:
58 - "queues", which have no internal structure visible from outside.
59 - "schedulers", which split all the packets to "traffic classes",
60 using "packet classifiers" (look at cls_api.c)
61
62 In turn, classes may have child qdiscs (as rule, queues)
63 attached to them etc. etc. etc.
64
65 The goal of the routines in this file is to translate
66 information supplied by user in the form of handles
67 to more intelligible for kernel form, to make some sanity
68 checks and part of work, which is common to all qdiscs
69 and to provide rtnetlink notifications.
70
71 All real intelligent work is done inside qdisc modules.
72
73
74
75 Every discipline has two major routines: enqueue and dequeue.
76
77 ---dequeue
78
79 dequeue usually returns a skb to send. It is allowed to return NULL,
80 but it does not mean that queue is empty, it just means that
81 discipline does not want to send anything this time.
82 Queue is really empty if q->q.qlen == 0.
83 For complicated disciplines with multiple queues q->q is not
84 real packet queue, but however q->q.qlen must be valid.
85
86 ---enqueue
87
88 enqueue returns 0, if packet was enqueued successfully.
89 If packet (this one or another one) was dropped, it returns
90 not zero error code.
91 NET_XMIT_DROP - this packet dropped
92 Expected action: do not backoff, but wait until queue will clear.
93 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
94 Expected action: backoff or ignore
95 NET_XMIT_POLICED - dropped by police.
96 Expected action: backoff or error to real-time apps.
97
98 Auxiliary routines:
99
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700100 ---peek
101
102 like dequeue but without removing a packet from the queue
103
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 ---reset
105
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
108
109 ---init
110
111 initializes newly created qdisc.
112
113 ---destroy
114
115 destroys resources allocated by init and during lifetime of qdisc.
116
117 ---change
118
119 changes qdisc parameters.
120 */
121
122/* Protects list of registered TC modules. It is pure SMP lock. */
123static DEFINE_RWLOCK(qdisc_mod_lock);
124
125
126/************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
129
130
131/* The list of all installed queueing disciplines. */
132
133static struct Qdisc_ops *qdisc_base;
134
135/* Register/uregister queueing discipline */
136
137int register_qdisc(struct Qdisc_ops *qops)
138{
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
141
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
146
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700149 if (qops->peek == NULL) {
150 if (qops->dequeue == NULL) {
151 qops->peek = noop_qdisc_ops.peek;
152 } else {
153 rc = -EINVAL;
154 goto out;
155 }
156 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 if (qops->dequeue == NULL)
158 qops->dequeue = noop_qdisc_ops.dequeue;
159
160 qops->next = NULL;
161 *qp = qops;
162 rc = 0;
163out:
164 write_unlock(&qdisc_mod_lock);
165 return rc;
166}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800167EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
169int unregister_qdisc(struct Qdisc_ops *qops)
170{
171 struct Qdisc_ops *q, **qp;
172 int err = -ENOENT;
173
174 write_lock(&qdisc_mod_lock);
175 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
176 if (q == qops)
177 break;
178 if (q) {
179 *qp = q->next;
180 q->next = NULL;
181 err = 0;
182 }
183 write_unlock(&qdisc_mod_lock);
184 return err;
185}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800186EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187
188/* We know handle. Find qdisc among all qdisc's attached to device
189 (root qdisc, all its children, children of children etc.)
190 */
191
David S. Miller8123b422008-08-08 23:23:39 -0700192struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
193{
194 struct Qdisc *q;
195
196 if (!(root->flags & TCQ_F_BUILTIN) &&
197 root->handle == handle)
198 return root;
199
200 list_for_each_entry(q, &root->list, list) {
201 if (q->handle == handle)
202 return q;
203 }
204 return NULL;
205}
206
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700207/*
208 * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
209 * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
210 */
211static DEFINE_SPINLOCK(qdisc_list_lock);
212
213static void qdisc_list_add(struct Qdisc *q)
214{
215 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
216 spin_lock_bh(&qdisc_list_lock);
217 list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
218 spin_unlock_bh(&qdisc_list_lock);
219 }
220}
221
222void qdisc_list_del(struct Qdisc *q)
223{
224 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
225 spin_lock_bh(&qdisc_list_lock);
226 list_del(&q->list);
227 spin_unlock_bh(&qdisc_list_lock);
228 }
229}
230EXPORT_SYMBOL(qdisc_list_del);
231
David S. Milleread81cc2008-07-17 00:50:32 -0700232struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800233{
David S. Miller30723672008-07-18 22:50:15 -0700234 unsigned int i;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700235 struct Qdisc *q;
236
237 spin_lock_bh(&qdisc_list_lock);
Patrick McHardy43effa12006-11-29 17:35:48 -0800238
David S. Miller30723672008-07-18 22:50:15 -0700239 for (i = 0; i < dev->num_tx_queues; i++) {
240 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700241 struct Qdisc *txq_root = txq->qdisc_sleeping;
David S. Miller30723672008-07-18 22:50:15 -0700242
David S. Miller8123b422008-08-08 23:23:39 -0700243 q = qdisc_match_from_root(txq_root, handle);
244 if (q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700245 goto unlock;
Patrick McHardy43effa12006-11-29 17:35:48 -0800246 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700247
248 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
249
250unlock:
251 spin_unlock_bh(&qdisc_list_lock);
252
253 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800254}
255
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
257{
258 unsigned long cl;
259 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800260 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
262 if (cops == NULL)
263 return NULL;
264 cl = cops->get(p, classid);
265
266 if (cl == 0)
267 return NULL;
268 leaf = cops->leaf(p, cl);
269 cops->put(p, cl);
270 return leaf;
271}
272
273/* Find queueing discipline by name */
274
Patrick McHardy1e904742008-01-22 22:11:17 -0800275static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276{
277 struct Qdisc_ops *q = NULL;
278
279 if (kind) {
280 read_lock(&qdisc_mod_lock);
281 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800282 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 if (!try_module_get(q->owner))
284 q = NULL;
285 break;
286 }
287 }
288 read_unlock(&qdisc_mod_lock);
289 }
290 return q;
291}
292
293static struct qdisc_rate_table *qdisc_rtab_list;
294
Patrick McHardy1e904742008-01-22 22:11:17 -0800295struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296{
297 struct qdisc_rate_table *rtab;
298
299 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
300 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
301 rtab->refcnt++;
302 return rtab;
303 }
304 }
305
Patrick McHardy5feb5e12008-01-23 20:35:19 -0800306 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
307 nla_len(tab) != TC_RTAB_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308 return NULL;
309
310 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
311 if (rtab) {
312 rtab->rate = *r;
313 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800314 memcpy(rtab->data, nla_data(tab), 1024);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 rtab->next = qdisc_rtab_list;
316 qdisc_rtab_list = rtab;
317 }
318 return rtab;
319}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800320EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
322void qdisc_put_rtab(struct qdisc_rate_table *tab)
323{
324 struct qdisc_rate_table *rtab, **rtabp;
325
326 if (!tab || --tab->refcnt)
327 return;
328
329 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
330 if (rtab == tab) {
331 *rtabp = rtab->next;
332 kfree(rtab);
333 return;
334 }
335 }
336}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800337EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700339static LIST_HEAD(qdisc_stab_list);
340static DEFINE_SPINLOCK(qdisc_stab_lock);
341
342static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
343 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
344 [TCA_STAB_DATA] = { .type = NLA_BINARY },
345};
346
347static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
348{
349 struct nlattr *tb[TCA_STAB_MAX + 1];
350 struct qdisc_size_table *stab;
351 struct tc_sizespec *s;
352 unsigned int tsize = 0;
353 u16 *tab = NULL;
354 int err;
355
356 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
357 if (err < 0)
358 return ERR_PTR(err);
359 if (!tb[TCA_STAB_BASE])
360 return ERR_PTR(-EINVAL);
361
362 s = nla_data(tb[TCA_STAB_BASE]);
363
364 if (s->tsize > 0) {
365 if (!tb[TCA_STAB_DATA])
366 return ERR_PTR(-EINVAL);
367 tab = nla_data(tb[TCA_STAB_DATA]);
368 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
369 }
370
371 if (!s || tsize != s->tsize || (!tab && tsize > 0))
372 return ERR_PTR(-EINVAL);
373
David S. Millerf3b96052008-08-18 22:33:05 -0700374 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700375
376 list_for_each_entry(stab, &qdisc_stab_list, list) {
377 if (memcmp(&stab->szopts, s, sizeof(*s)))
378 continue;
379 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
380 continue;
381 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700382 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700383 return stab;
384 }
385
David S. Millerf3b96052008-08-18 22:33:05 -0700386 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700387
388 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
389 if (!stab)
390 return ERR_PTR(-ENOMEM);
391
392 stab->refcnt = 1;
393 stab->szopts = *s;
394 if (tsize > 0)
395 memcpy(stab->data, tab, tsize * sizeof(u16));
396
David S. Millerf3b96052008-08-18 22:33:05 -0700397 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700398 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700399 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700400
401 return stab;
402}
403
404void qdisc_put_stab(struct qdisc_size_table *tab)
405{
406 if (!tab)
407 return;
408
David S. Millerf3b96052008-08-18 22:33:05 -0700409 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700410
411 if (--tab->refcnt == 0) {
412 list_del(&tab->list);
413 kfree(tab);
414 }
415
David S. Millerf3b96052008-08-18 22:33:05 -0700416 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700417}
418EXPORT_SYMBOL(qdisc_put_stab);
419
420static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
421{
422 struct nlattr *nest;
423
424 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800425 if (nest == NULL)
426 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700427 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
428 nla_nest_end(skb, nest);
429
430 return skb->len;
431
432nla_put_failure:
433 return -1;
434}
435
436void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
437{
438 int pkt_len, slot;
439
440 pkt_len = skb->len + stab->szopts.overhead;
441 if (unlikely(!stab->szopts.tsize))
442 goto out;
443
444 slot = pkt_len + stab->szopts.cell_align;
445 if (unlikely(slot < 0))
446 slot = 0;
447
448 slot >>= stab->szopts.cell_log;
449 if (likely(slot < stab->szopts.tsize))
450 pkt_len = stab->data[slot];
451 else
452 pkt_len = stab->data[stab->szopts.tsize - 1] *
453 (slot / stab->szopts.tsize) +
454 stab->data[slot % stab->szopts.tsize];
455
456 pkt_len <<= stab->szopts.size_log;
457out:
458 if (unlikely(pkt_len < 1))
459 pkt_len = 1;
460 qdisc_skb_cb(skb)->pkt_len = pkt_len;
461}
462EXPORT_SYMBOL(qdisc_calculate_pkt_len);
463
Patrick McHardy41794772007-03-16 01:19:15 -0700464static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
465{
466 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
467 timer);
468
469 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
Stephen Hemminger11274e52007-03-22 12:17:42 -0700470 smp_wmb();
David S. Miller8608db02008-08-18 20:51:18 -0700471 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700472
Patrick McHardy41794772007-03-16 01:19:15 -0700473 return HRTIMER_NORESTART;
474}
475
476void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
477{
478 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
479 wd->timer.function = qdisc_watchdog;
480 wd->qdisc = qdisc;
481}
482EXPORT_SYMBOL(qdisc_watchdog_init);
483
484void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
485{
486 ktime_t time;
487
Jarek Poplawski2540e052008-08-21 05:11:14 -0700488 if (test_bit(__QDISC_STATE_DEACTIVATED,
489 &qdisc_root_sleeping(wd->qdisc)->state))
490 return;
491
Patrick McHardy41794772007-03-16 01:19:15 -0700492 wd->qdisc->flags |= TCQ_F_THROTTLED;
493 time = ktime_set(0, 0);
494 time = ktime_add_ns(time, PSCHED_US2NS(expires));
495 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
496}
497EXPORT_SYMBOL(qdisc_watchdog_schedule);
498
499void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
500{
501 hrtimer_cancel(&wd->timer);
502 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
503}
504EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505
Adrian Bunka94f7792008-07-22 14:20:11 -0700506static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700507{
508 unsigned int size = n * sizeof(struct hlist_head), i;
509 struct hlist_head *h;
510
511 if (size <= PAGE_SIZE)
512 h = kmalloc(size, GFP_KERNEL);
513 else
514 h = (struct hlist_head *)
515 __get_free_pages(GFP_KERNEL, get_order(size));
516
517 if (h != NULL) {
518 for (i = 0; i < n; i++)
519 INIT_HLIST_HEAD(&h[i]);
520 }
521 return h;
522}
523
524static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
525{
526 unsigned int size = n * sizeof(struct hlist_head);
527
528 if (size <= PAGE_SIZE)
529 kfree(h);
530 else
531 free_pages((unsigned long)h, get_order(size));
532}
533
534void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
535{
536 struct Qdisc_class_common *cl;
537 struct hlist_node *n, *next;
538 struct hlist_head *nhash, *ohash;
539 unsigned int nsize, nmask, osize;
540 unsigned int i, h;
541
542 /* Rehash when load factor exceeds 0.75 */
543 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
544 return;
545 nsize = clhash->hashsize * 2;
546 nmask = nsize - 1;
547 nhash = qdisc_class_hash_alloc(nsize);
548 if (nhash == NULL)
549 return;
550
551 ohash = clhash->hash;
552 osize = clhash->hashsize;
553
554 sch_tree_lock(sch);
555 for (i = 0; i < osize; i++) {
556 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
557 h = qdisc_class_hash(cl->classid, nmask);
558 hlist_add_head(&cl->hnode, &nhash[h]);
559 }
560 }
561 clhash->hash = nhash;
562 clhash->hashsize = nsize;
563 clhash->hashmask = nmask;
564 sch_tree_unlock(sch);
565
566 qdisc_class_hash_free(ohash, osize);
567}
568EXPORT_SYMBOL(qdisc_class_hash_grow);
569
570int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
571{
572 unsigned int size = 4;
573
574 clhash->hash = qdisc_class_hash_alloc(size);
575 if (clhash->hash == NULL)
576 return -ENOMEM;
577 clhash->hashsize = size;
578 clhash->hashmask = size - 1;
579 clhash->hashelems = 0;
580 return 0;
581}
582EXPORT_SYMBOL(qdisc_class_hash_init);
583
584void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
585{
586 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
587}
588EXPORT_SYMBOL(qdisc_class_hash_destroy);
589
590void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
591 struct Qdisc_class_common *cl)
592{
593 unsigned int h;
594
595 INIT_HLIST_NODE(&cl->hnode);
596 h = qdisc_class_hash(cl->classid, clhash->hashmask);
597 hlist_add_head(&cl->hnode, &clhash->hash[h]);
598 clhash->hashelems++;
599}
600EXPORT_SYMBOL(qdisc_class_hash_insert);
601
602void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
603 struct Qdisc_class_common *cl)
604{
605 hlist_del(&cl->hnode);
606 clhash->hashelems--;
607}
608EXPORT_SYMBOL(qdisc_class_hash_remove);
609
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610/* Allocate an unique handle from space managed by kernel */
611
612static u32 qdisc_alloc_handle(struct net_device *dev)
613{
614 int i = 0x10000;
615 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
616
617 do {
618 autohandle += TC_H_MAKE(0x10000U, 0);
619 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
620 autohandle = TC_H_MAKE(0x80000000U, 0);
621 } while (qdisc_lookup(dev, autohandle) && --i > 0);
622
623 return i>0 ? autohandle : 0;
624}
625
David S. Miller99194cf2008-07-17 04:54:10 -0700626/* Attach toplevel qdisc to device queue. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
David S. Miller99194cf2008-07-17 04:54:10 -0700628static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
629 struct Qdisc *qdisc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630{
David S. Miller8d50b532008-07-30 02:37:46 -0700631 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
David S. Miller53049972008-07-16 03:00:19 -0700632 spinlock_t *root_lock;
David S. Miller53049972008-07-16 03:00:19 -0700633
Jarek Poplawski666d9bb2008-08-27 02:12:52 -0700634 root_lock = qdisc_lock(oqdisc);
David S. Miller53049972008-07-16 03:00:19 -0700635 spin_lock_bh(root_lock);
636
David S. Miller8d50b532008-07-30 02:37:46 -0700637 /* Prune old scheduler */
638 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
639 qdisc_reset(oqdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
David S. Miller8d50b532008-07-30 02:37:46 -0700641 /* ... and graft new one */
642 if (qdisc == NULL)
643 qdisc = &noop_qdisc;
644 dev_queue->qdisc_sleeping = qdisc;
Jarek Poplawskif7a54c12008-08-27 02:22:07 -0700645 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
David S. Miller53049972008-07-16 03:00:19 -0700647 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 return oqdisc;
650}
651
Patrick McHardy43effa12006-11-29 17:35:48 -0800652void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
653{
Eric Dumazet20fea082007-11-14 01:44:41 -0800654 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800655 unsigned long cl;
656 u32 parentid;
657
658 if (n == 0)
659 return;
660 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700661 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
662 return;
663
David S. Miller5ce2d482008-07-08 17:06:30 -0700664 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700665 if (sch == NULL) {
666 WARN_ON(parentid != TC_H_ROOT);
667 return;
668 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800669 cops = sch->ops->cl_ops;
670 if (cops->qlen_notify) {
671 cl = cops->get(sch, parentid);
672 cops->qlen_notify(sch, cl);
673 cops->put(sch, cl);
674 }
675 sch->q.qlen -= n;
676 }
677}
678EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679
David S. Miller99194cf2008-07-17 04:54:10 -0700680static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
681 struct Qdisc *old, struct Qdisc *new)
682{
683 if (new || old)
684 qdisc_notify(skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
David S. Miller4d8863a2008-08-18 21:03:15 -0700686 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700687 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700688}
689
690/* Graft qdisc "new" to class "classid" of qdisc "parent" or
691 * to device "dev".
692 *
693 * When appropriate send a netlink notification using 'skb'
694 * and "n".
695 *
696 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 */
698
699static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700700 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
701 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702{
David S. Miller99194cf2008-07-17 04:54:10 -0700703 struct Qdisc *q = old;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900706 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700707 unsigned int i, num_q, ingress;
708
709 ingress = 0;
710 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700711 if ((q && q->flags & TCQ_F_INGRESS) ||
712 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700713 num_q = 1;
714 ingress = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 }
David S. Miller99194cf2008-07-17 04:54:10 -0700716
717 if (dev->flags & IFF_UP)
718 dev_deactivate(dev);
719
720 for (i = 0; i < num_q; i++) {
721 struct netdev_queue *dev_queue = &dev->rx_queue;
722
723 if (!ingress)
724 dev_queue = netdev_get_tx_queue(dev, i);
725
David S. Miller8d50b532008-07-30 02:37:46 -0700726 old = dev_graft_qdisc(dev_queue, new);
727 if (new && i > 0)
728 atomic_inc(&new->refcnt);
729
David S. Miller99194cf2008-07-17 04:54:10 -0700730 notify_and_destroy(skb, n, classid, old, new);
731 }
732
733 if (dev->flags & IFF_UP)
734 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800736 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737
738 err = -EINVAL;
739
740 if (cops) {
741 unsigned long cl = cops->get(parent, classid);
742 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700743 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 cops->put(parent, cl);
745 }
746 }
David S. Miller99194cf2008-07-17 04:54:10 -0700747 if (!err)
748 notify_and_destroy(skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 }
750 return err;
751}
752
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700753/* lockdep annotation is needed for ingress; egress gets it only for name */
754static struct lock_class_key qdisc_tx_lock;
755static struct lock_class_key qdisc_rx_lock;
756
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757/*
758 Allocate and initialize new qdisc.
759
760 Parameters are passed via opt.
761 */
762
763static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700764qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
765 u32 parent, u32 handle, struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766{
767 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800768 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 struct Qdisc *sch;
770 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700771 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
773 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700774#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 if (ops == NULL && kind != NULL) {
776 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800777 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 /* We dropped the RTNL semaphore in order to
779 * perform the module load. So, even if we
780 * succeeded in loading the module we have to
781 * tell the caller to replay the request. We
782 * indicate this using -EAGAIN.
783 * We replay the request because the device may
784 * go away in the mean time.
785 */
786 rtnl_unlock();
787 request_module("sch_%s", name);
788 rtnl_lock();
789 ops = qdisc_lookup_ops(kind);
790 if (ops != NULL) {
791 /* We will try again qdisc_lookup_ops,
792 * so don't keep a reference.
793 */
794 module_put(ops->owner);
795 err = -EAGAIN;
796 goto err_out;
797 }
798 }
799 }
800#endif
801
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700802 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 if (ops == NULL)
804 goto err_out;
805
David S. Miller5ce2d482008-07-08 17:06:30 -0700806 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700807 if (IS_ERR(sch)) {
808 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700810 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700812 sch->parent = parent;
813
Thomas Graf3d54b822005-07-05 14:15:09 -0700814 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700816 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700817 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700818 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700819 if (handle == 0) {
820 handle = qdisc_alloc_handle(dev);
821 err = -ENOMEM;
822 if (handle == 0)
823 goto err_out3;
824 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700825 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 }
827
Thomas Graf3d54b822005-07-05 14:15:09 -0700828 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829
Patrick McHardy1e904742008-01-22 22:11:17 -0800830 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700831 if (tca[TCA_STAB]) {
832 stab = qdisc_get_stab(tca[TCA_STAB]);
833 if (IS_ERR(stab)) {
834 err = PTR_ERR(stab);
835 goto err_out3;
836 }
837 sch->stab = stab;
838 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800839 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700840 spinlock_t *root_lock;
841
842 if ((sch->parent != TC_H_ROOT) &&
843 !(sch->flags & TCQ_F_INGRESS))
844 root_lock = qdisc_root_sleeping_lock(sch);
845 else
846 root_lock = qdisc_lock(sch);
847
Thomas Graf023e09a2005-07-05 14:15:53 -0700848 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700849 root_lock, tca[TCA_RATE]);
Thomas Graf023e09a2005-07-05 14:15:53 -0700850 if (err) {
851 /*
852 * Any broken qdiscs that would require
853 * a ops->reset() here? The qdisc was never
854 * in action so it shouldn't be necessary.
855 */
856 if (ops->destroy)
857 ops->destroy(sch);
858 goto err_out3;
859 }
860 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700861
862 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 return sch;
865 }
866err_out3:
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700867 qdisc_put_stab(sch->stab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700869 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870err_out2:
871 module_put(ops->owner);
872err_out:
873 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 return NULL;
875}
876
Patrick McHardy1e904742008-01-22 22:11:17 -0800877static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878{
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700879 struct qdisc_size_table *stab = NULL;
880 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700882 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 if (sch->ops->change == NULL)
884 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -0800885 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 if (err)
887 return err;
888 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700889
890 if (tca[TCA_STAB]) {
891 stab = qdisc_get_stab(tca[TCA_STAB]);
892 if (IS_ERR(stab))
893 return PTR_ERR(stab);
894 }
895
896 qdisc_put_stab(sch->stab);
897 sch->stab = stab;
898
Patrick McHardy1e904742008-01-22 22:11:17 -0800899 if (tca[TCA_RATE])
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700901 qdisc_root_sleeping_lock(sch),
902 tca[TCA_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 return 0;
904}
905
906struct check_loop_arg
907{
908 struct qdisc_walker w;
909 struct Qdisc *p;
910 int depth;
911};
912
913static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
914
915static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
916{
917 struct check_loop_arg arg;
918
919 if (q->ops->cl_ops == NULL)
920 return 0;
921
922 arg.w.stop = arg.w.skip = arg.w.count = 0;
923 arg.w.fn = check_loop_fn;
924 arg.depth = depth;
925 arg.p = p;
926 q->ops->cl_ops->walk(q, &arg.w);
927 return arg.w.stop ? -ELOOP : 0;
928}
929
930static int
931check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
932{
933 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800934 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 struct check_loop_arg *arg = (struct check_loop_arg *)w;
936
937 leaf = cops->leaf(q, cl);
938 if (leaf) {
939 if (leaf == arg->p || arg->depth > 7)
940 return -ELOOP;
941 return check_loop(leaf, arg->p, arg->depth + 1);
942 }
943 return 0;
944}
945
946/*
947 * Delete/get qdisc.
948 */
949
950static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
951{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900952 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -0800954 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 struct net_device *dev;
956 u32 clid = tcm->tcm_parent;
957 struct Qdisc *q = NULL;
958 struct Qdisc *p = NULL;
959 int err;
960
Denis V. Lunevb8542722007-12-01 00:21:31 +1100961 if (net != &init_net)
962 return -EINVAL;
963
Eric W. Biederman881d9662007-09-17 11:56:21 -0700964 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 return -ENODEV;
966
Patrick McHardy1e904742008-01-22 22:11:17 -0800967 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
968 if (err < 0)
969 return err;
970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 if (clid) {
972 if (clid != TC_H_ROOT) {
973 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
974 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
975 return -ENOENT;
976 q = qdisc_leaf(p, clid);
977 } else { /* ingress */
David S. Miller8123b422008-08-08 23:23:39 -0700978 q = dev->rx_queue.qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900979 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 } else {
David S. Millere8a04642008-07-17 00:34:19 -0700981 struct netdev_queue *dev_queue;
982 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -0700983 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 }
985 if (!q)
986 return -ENOENT;
987
988 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
989 return -EINVAL;
990 } else {
991 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
992 return -ENOENT;
993 }
994
Patrick McHardy1e904742008-01-22 22:11:17 -0800995 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 return -EINVAL;
997
998 if (n->nlmsg_type == RTM_DELQDISC) {
999 if (!clid)
1000 return -EINVAL;
1001 if (q->handle == 0)
1002 return -ENOENT;
David S. Miller99194cf2008-07-17 04:54:10 -07001003 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 } else {
1006 qdisc_notify(skb, n, clid, NULL, q);
1007 }
1008 return 0;
1009}
1010
1011/*
1012 Create/change qdisc.
1013 */
1014
1015static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1016{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001017 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001019 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 struct net_device *dev;
1021 u32 clid;
1022 struct Qdisc *q, *p;
1023 int err;
1024
Denis V. Lunevb8542722007-12-01 00:21:31 +11001025 if (net != &init_net)
1026 return -EINVAL;
1027
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028replay:
1029 /* Reinit, just in case something touches this. */
1030 tcm = NLMSG_DATA(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 clid = tcm->tcm_parent;
1032 q = p = NULL;
1033
Eric W. Biederman881d9662007-09-17 11:56:21 -07001034 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035 return -ENODEV;
1036
Patrick McHardy1e904742008-01-22 22:11:17 -08001037 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1038 if (err < 0)
1039 return err;
1040
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 if (clid) {
1042 if (clid != TC_H_ROOT) {
1043 if (clid != TC_H_INGRESS) {
1044 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1045 return -ENOENT;
1046 q = qdisc_leaf(p, clid);
1047 } else { /*ingress */
David S. Miller8123b422008-08-08 23:23:39 -07001048 q = dev->rx_queue.qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 }
1050 } else {
David S. Millere8a04642008-07-17 00:34:19 -07001051 struct netdev_queue *dev_queue;
1052 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Millerb0e1e642008-07-08 17:42:10 -07001053 q = dev_queue->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 }
1055
1056 /* It may be default qdisc, ignore it */
1057 if (q && q->handle == 0)
1058 q = NULL;
1059
1060 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1061 if (tcm->tcm_handle) {
1062 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1063 return -EEXIST;
1064 if (TC_H_MIN(tcm->tcm_handle))
1065 return -EINVAL;
1066 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1067 goto create_n_graft;
1068 if (n->nlmsg_flags&NLM_F_EXCL)
1069 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001070 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 return -EINVAL;
1072 if (q == p ||
1073 (p && check_loop(q, p, 0)))
1074 return -ELOOP;
1075 atomic_inc(&q->refcnt);
1076 goto graft;
1077 } else {
1078 if (q == NULL)
1079 goto create_n_graft;
1080
1081 /* This magic test requires explanation.
1082 *
1083 * We know, that some child q is already
1084 * attached to this parent and have choice:
1085 * either to change it or to create/graft new one.
1086 *
1087 * 1. We are allowed to create/graft only
1088 * if CREATE and REPLACE flags are set.
1089 *
1090 * 2. If EXCL is set, requestor wanted to say,
1091 * that qdisc tcm_handle is not expected
1092 * to exist, so that we choose create/graft too.
1093 *
1094 * 3. The last case is when no flags are set.
1095 * Alas, it is sort of hole in API, we
1096 * cannot decide what to do unambiguously.
1097 * For now we select create/graft, if
1098 * user gave KIND, which does not match existing.
1099 */
1100 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1101 (n->nlmsg_flags&NLM_F_REPLACE) &&
1102 ((n->nlmsg_flags&NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001103 (tca[TCA_KIND] &&
1104 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 goto create_n_graft;
1106 }
1107 }
1108 } else {
1109 if (!tcm->tcm_handle)
1110 return -EINVAL;
1111 q = qdisc_lookup(dev, tcm->tcm_handle);
1112 }
1113
1114 /* Change qdisc parameters */
1115 if (q == NULL)
1116 return -ENOENT;
1117 if (n->nlmsg_flags&NLM_F_EXCL)
1118 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001119 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 return -EINVAL;
1121 err = qdisc_change(q, tca);
1122 if (err == 0)
1123 qdisc_notify(skb, n, clid, NULL, q);
1124 return err;
1125
1126create_n_graft:
1127 if (!(n->nlmsg_flags&NLM_F_CREATE))
1128 return -ENOENT;
1129 if (clid == TC_H_INGRESS)
David S. Millerbb949fb2008-07-08 16:55:56 -07001130 q = qdisc_create(dev, &dev->rx_queue,
1131 tcm->tcm_parent, tcm->tcm_parent,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001132 tca, &err);
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001133 else
David S. Millere8a04642008-07-17 00:34:19 -07001134 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
David S. Millerbb949fb2008-07-08 16:55:56 -07001135 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001136 tca, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 if (q == NULL) {
1138 if (err == -EAGAIN)
1139 goto replay;
1140 return err;
1141 }
1142
1143graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001144 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1145 if (err) {
1146 if (q)
1147 qdisc_destroy(q);
1148 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001150
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 return 0;
1152}
1153
1154static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001155 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156{
1157 struct tcmsg *tcm;
1158 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001159 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 struct gnet_dump d;
1161
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001162 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 tcm = NLMSG_DATA(nlh);
1164 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001165 tcm->tcm__pad1 = 0;
1166 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001167 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 tcm->tcm_parent = clid;
1169 tcm->tcm_handle = q->handle;
1170 tcm->tcm_info = atomic_read(&q->refcnt);
Patrick McHardy57e1c482008-01-23 20:34:28 -08001171 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001173 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 q->qstats.qlen = q->q.qlen;
1175
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001176 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1177 goto nla_put_failure;
1178
Jarek Poplawski102396a2008-08-29 14:21:52 -07001179 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1180 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001181 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182
1183 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001184 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
1186 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001189 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001190
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001192 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001193
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001194 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 return skb->len;
1196
1197nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001198nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001199 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 return -1;
1201}
1202
1203static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1204 u32 clid, struct Qdisc *old, struct Qdisc *new)
1205{
1206 struct sk_buff *skb;
1207 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1208
1209 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1210 if (!skb)
1211 return -ENOBUFS;
1212
1213 if (old && old->handle) {
1214 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1215 goto err_out;
1216 }
1217 if (new) {
1218 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1219 goto err_out;
1220 }
1221
1222 if (skb->len)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001223 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224
1225err_out:
1226 kfree_skb(skb);
1227 return -EINVAL;
1228}
1229
David S. Miller30723672008-07-18 22:50:15 -07001230static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1231{
1232 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1233}
1234
1235static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1236 struct netlink_callback *cb,
1237 int *q_idx_p, int s_q_idx)
1238{
1239 int ret = 0, q_idx = *q_idx_p;
1240 struct Qdisc *q;
1241
1242 if (!root)
1243 return 0;
1244
1245 q = root;
1246 if (q_idx < s_q_idx) {
1247 q_idx++;
1248 } else {
1249 if (!tc_qdisc_dump_ignore(q) &&
1250 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1251 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1252 goto done;
1253 q_idx++;
1254 }
1255 list_for_each_entry(q, &root->list, list) {
1256 if (q_idx < s_q_idx) {
1257 q_idx++;
1258 continue;
1259 }
1260 if (!tc_qdisc_dump_ignore(q) &&
1261 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1262 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1263 goto done;
1264 q_idx++;
1265 }
1266
1267out:
1268 *q_idx_p = q_idx;
1269 return ret;
1270done:
1271 ret = -1;
1272 goto out;
1273}
1274
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1276{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001277 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 int idx, q_idx;
1279 int s_idx, s_q_idx;
1280 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281
Denis V. Lunevb8542722007-12-01 00:21:31 +11001282 if (net != &init_net)
1283 return 0;
1284
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 s_idx = cb->args[0];
1286 s_q_idx = q_idx = cb->args[1];
1287 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07001288 idx = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001289 for_each_netdev(&init_net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001290 struct netdev_queue *dev_queue;
1291
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001293 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 if (idx > s_idx)
1295 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001297
1298 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller827ebd62008-08-07 20:26:40 -07001299 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001300 goto done;
1301
1302 dev_queue = &dev->rx_queue;
David S. Miller827ebd62008-08-07 20:26:40 -07001303 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001304 goto done;
1305
Pavel Emelianov7562f872007-05-03 15:13:45 -07001306cont:
1307 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 }
1309
1310done:
1311 read_unlock(&dev_base_lock);
1312
1313 cb->args[0] = idx;
1314 cb->args[1] = q_idx;
1315
1316 return skb->len;
1317}
1318
1319
1320
1321/************************************************
1322 * Traffic classes manipulation. *
1323 ************************************************/
1324
1325
1326
1327static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1328{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001329 struct net *net = sock_net(skb->sk);
David S. Millerb0e1e642008-07-08 17:42:10 -07001330 struct netdev_queue *dev_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 struct tcmsg *tcm = NLMSG_DATA(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001332 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 struct net_device *dev;
1334 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001335 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 unsigned long cl = 0;
1337 unsigned long new_cl;
1338 u32 pid = tcm->tcm_parent;
1339 u32 clid = tcm->tcm_handle;
1340 u32 qid = TC_H_MAJ(clid);
1341 int err;
1342
Denis V. Lunevb8542722007-12-01 00:21:31 +11001343 if (net != &init_net)
1344 return -EINVAL;
1345
Eric W. Biederman881d9662007-09-17 11:56:21 -07001346 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 return -ENODEV;
1348
Patrick McHardy1e904742008-01-22 22:11:17 -08001349 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1350 if (err < 0)
1351 return err;
1352
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 /*
1354 parent == TC_H_UNSPEC - unspecified parent.
1355 parent == TC_H_ROOT - class is root, which has no parent.
1356 parent == X:0 - parent is root class.
1357 parent == X:Y - parent is a node in hierarchy.
1358 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1359
1360 handle == 0:0 - generate handle from kernel pool.
1361 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1362 handle == X:Y - clear.
1363 handle == X:0 - root class.
1364 */
1365
1366 /* Step 1. Determine qdisc handle X:0 */
1367
David S. Millere8a04642008-07-17 00:34:19 -07001368 dev_queue = netdev_get_tx_queue(dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 if (pid != TC_H_ROOT) {
1370 u32 qid1 = TC_H_MAJ(pid);
1371
1372 if (qid && qid1) {
1373 /* If both majors are known, they must be identical. */
1374 if (qid != qid1)
1375 return -EINVAL;
1376 } else if (qid1) {
1377 qid = qid1;
1378 } else if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001379 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380
1381 /* Now qid is genuine qdisc handle consistent
1382 both with parent and child.
1383
1384 TC_H_MAJ(pid) still may be unspecified, complete it now.
1385 */
1386 if (pid)
1387 pid = TC_H_MAKE(qid, pid);
1388 } else {
1389 if (qid == 0)
David S. Millerb0e1e642008-07-08 17:42:10 -07001390 qid = dev_queue->qdisc_sleeping->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 }
1392
1393 /* OK. Locate qdisc */
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001394 if ((q = qdisc_lookup(dev, qid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395 return -ENOENT;
1396
1397 /* An check that it supports classes */
1398 cops = q->ops->cl_ops;
1399 if (cops == NULL)
1400 return -EINVAL;
1401
1402 /* Now try to get class */
1403 if (clid == 0) {
1404 if (pid == TC_H_ROOT)
1405 clid = qid;
1406 } else
1407 clid = TC_H_MAKE(qid, clid);
1408
1409 if (clid)
1410 cl = cops->get(q, clid);
1411
1412 if (cl == 0) {
1413 err = -ENOENT;
1414 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1415 goto out;
1416 } else {
1417 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001418 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 err = -EEXIST;
1420 if (n->nlmsg_flags&NLM_F_EXCL)
1421 goto out;
1422 break;
1423 case RTM_DELTCLASS:
1424 err = cops->delete(q, cl);
1425 if (err == 0)
1426 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1427 goto out;
1428 case RTM_GETTCLASS:
1429 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1430 goto out;
1431 default:
1432 err = -EINVAL;
1433 goto out;
1434 }
1435 }
1436
1437 new_cl = cl;
1438 err = cops->change(q, clid, pid, tca, &new_cl);
1439 if (err == 0)
1440 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1441
1442out:
1443 if (cl)
1444 cops->put(q, cl);
1445
1446 return err;
1447}
1448
1449
1450static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1451 unsigned long cl,
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001452 u32 pid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453{
1454 struct tcmsg *tcm;
1455 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001456 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001458 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459
Jamal Hadi Salime431b8c2005-06-18 22:55:31 -07001460 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461 tcm = NLMSG_DATA(nlh);
1462 tcm->tcm_family = AF_UNSPEC;
David S. Miller5ce2d482008-07-08 17:06:30 -07001463 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464 tcm->tcm_parent = q->handle;
1465 tcm->tcm_handle = q->handle;
1466 tcm->tcm_info = 0;
Patrick McHardy57e1c482008-01-23 20:34:28 -08001467 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001469 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470
Jarek Poplawski102396a2008-08-29 14:21:52 -07001471 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1472 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001473 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474
1475 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001476 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477
1478 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001479 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001481 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 return skb->len;
1483
1484nlmsg_failure:
Patrick McHardy1e904742008-01-22 22:11:17 -08001485nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001486 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 return -1;
1488}
1489
1490static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1491 struct Qdisc *q, unsigned long cl, int event)
1492{
1493 struct sk_buff *skb;
1494 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1495
1496 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1497 if (!skb)
1498 return -ENOBUFS;
1499
1500 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1501 kfree_skb(skb);
1502 return -EINVAL;
1503 }
1504
Denis V. Lunev97c53ca2007-11-19 22:26:51 -08001505 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506}
1507
1508struct qdisc_dump_args
1509{
1510 struct qdisc_walker w;
1511 struct sk_buff *skb;
1512 struct netlink_callback *cb;
1513};
1514
1515static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1516{
1517 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1518
1519 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1520 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1521}
1522
David S. Miller30723672008-07-18 22:50:15 -07001523static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1524 struct tcmsg *tcm, struct netlink_callback *cb,
1525 int *t_p, int s_t)
1526{
1527 struct qdisc_dump_args arg;
1528
1529 if (tc_qdisc_dump_ignore(q) ||
1530 *t_p < s_t || !q->ops->cl_ops ||
1531 (tcm->tcm_parent &&
1532 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1533 (*t_p)++;
1534 return 0;
1535 }
1536 if (*t_p > s_t)
1537 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1538 arg.w.fn = qdisc_class_dump;
1539 arg.skb = skb;
1540 arg.cb = cb;
1541 arg.w.stop = 0;
1542 arg.w.skip = cb->args[1];
1543 arg.w.count = 0;
1544 q->ops->cl_ops->walk(q, &arg.w);
1545 cb->args[1] = arg.w.count;
1546 if (arg.w.stop)
1547 return -1;
1548 (*t_p)++;
1549 return 0;
1550}
1551
1552static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1553 struct tcmsg *tcm, struct netlink_callback *cb,
1554 int *t_p, int s_t)
1555{
1556 struct Qdisc *q;
1557
1558 if (!root)
1559 return 0;
1560
1561 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1562 return -1;
1563
1564 list_for_each_entry(q, &root->list, list) {
1565 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1566 return -1;
1567 }
1568
1569 return 0;
1570}
1571
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1573{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001575 struct net *net = sock_net(skb->sk);
1576 struct netdev_queue *dev_queue;
1577 struct net_device *dev;
1578 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579
Denis V. Lunevb8542722007-12-01 00:21:31 +11001580 if (net != &init_net)
1581 return 0;
1582
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1584 return 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001585 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 return 0;
1587
1588 s_t = cb->args[0];
1589 t = 0;
1590
David S. Miller30723672008-07-18 22:50:15 -07001591 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller8123b422008-08-08 23:23:39 -07001592 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001593 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594
David S. Miller30723672008-07-18 22:50:15 -07001595 dev_queue = &dev->rx_queue;
David S. Miller8123b422008-08-08 23:23:39 -07001596 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001597 goto done;
1598
1599done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 cb->args[0] = t;
1601
1602 dev_put(dev);
1603 return skb->len;
1604}
1605
1606/* Main classifier routine: scans classifier chain attached
1607 to this qdisc, (optionally) tests for protocol and asks
1608 specific classifiers.
1609 */
Patrick McHardy73ca4912007-07-15 00:02:31 -07001610int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1611 struct tcf_result *res)
1612{
1613 __be16 protocol = skb->protocol;
1614 int err = 0;
1615
1616 for (; tp; tp = tp->next) {
1617 if ((tp->protocol == protocol ||
1618 tp->protocol == htons(ETH_P_ALL)) &&
1619 (err = tp->classify(skb, tp, res)) >= 0) {
1620#ifdef CONFIG_NET_CLS_ACT
1621 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1622 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1623#endif
1624 return err;
1625 }
1626 }
1627 return -1;
1628}
1629EXPORT_SYMBOL(tc_classify_compat);
1630
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001632 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633{
1634 int err = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001635 __be16 protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636#ifdef CONFIG_NET_CLS_ACT
1637 struct tcf_proto *otp = tp;
1638reclassify:
1639#endif
1640 protocol = skb->protocol;
1641
Patrick McHardy73ca4912007-07-15 00:02:31 -07001642 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001644 if (err == TC_ACT_RECLASSIFY) {
1645 u32 verd = G_TC_VERD(skb->tc_verd);
1646 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647
Patrick McHardy73ca4912007-07-15 00:02:31 -07001648 if (verd++ >= MAX_REC_LOOP) {
1649 printk("rule prio %u protocol %02x reclassify loop, "
1650 "packet dropped\n",
1651 tp->prio&0xffff, ntohs(tp->protocol));
1652 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001654 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1655 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001657#endif
1658 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001660EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661
Patrick McHardya48b5a62007-03-23 11:29:43 -07001662void tcf_destroy(struct tcf_proto *tp)
1663{
1664 tp->ops->destroy(tp);
1665 module_put(tp->ops->owner);
1666 kfree(tp);
1667}
1668
Patrick McHardyff31ab52008-07-01 19:52:38 -07001669void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001670{
1671 struct tcf_proto *tp;
1672
Patrick McHardyff31ab52008-07-01 19:52:38 -07001673 while ((tp = *fl) != NULL) {
1674 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001675 tcf_destroy(tp);
1676 }
1677}
1678EXPORT_SYMBOL(tcf_destroy_chain);
1679
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680#ifdef CONFIG_PROC_FS
1681static int psched_show(struct seq_file *seq, void *v)
1682{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001683 struct timespec ts;
1684
1685 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 seq_printf(seq, "%08x %08x %08x %08x\n",
Patrick McHardy641b9e02007-03-16 01:18:42 -07001687 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001688 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001689 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690
1691 return 0;
1692}
1693
1694static int psched_open(struct inode *inode, struct file *file)
1695{
1696 return single_open(file, psched_show, PDE(inode)->data);
1697}
1698
Arjan van de Venda7071d2007-02-12 00:55:36 -08001699static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 .owner = THIS_MODULE,
1701 .open = psched_open,
1702 .read = seq_read,
1703 .llseek = seq_lseek,
1704 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001705};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706#endif
1707
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708static int __init pktsched_init(void)
1709{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 register_qdisc(&pfifo_qdisc_ops);
1711 register_qdisc(&bfifo_qdisc_ops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001712 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713
Thomas Grafbe577dd2007-03-22 11:55:50 -07001714 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1715 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1716 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1717 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1718 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1719 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1720
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 return 0;
1722}
1723
1724subsys_initcall(pktsched_init);