ipv4: minor spelling fix
[cascardo/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         ASSERT_RTNL();
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127         ASSERT_RTNL();
128         hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         u32 hash = inet_addr_hash(net, addr);
142         struct net_device *result = NULL;
143         struct in_ifaddr *ifa;
144
145         rcu_read_lock();
146         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147                 if (ifa->ifa_local == addr) {
148                         struct net_device *dev = ifa->ifa_dev->dev;
149
150                         if (!net_eq(dev_net(dev), net))
151                                 continue;
152                         result = dev;
153                         break;
154                 }
155         }
156         if (!result) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         }
170         if (result && devref)
171                 dev_hold(result);
172         rcu_read_unlock();
173         return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static void devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static void devinet_sysctl_register(struct in_device *idev)
187 {
188 }
189 static void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193
194 /* Locks all the inet devices. */
195
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204         if (ifa->ifa_dev)
205                 in_dev_put(ifa->ifa_dev);
206         kfree(ifa);
207 }
208
209 static void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216         struct net_device *dev = idev->dev;
217
218         WARN_ON(idev->ifa_list);
219         WARN_ON(idev->mc_list);
220         kfree(rcu_dereference_protected(idev->mc_hash, 1));
221 #ifdef NET_REFCNT_DEBUG
222         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 #endif
224         dev_put(dev);
225         if (!idev->dead)
226                 pr_err("Freeing alive in_device %p\n", idev);
227         else
228                 kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234         struct in_device *in_dev;
235
236         ASSERT_RTNL();
237
238         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239         if (!in_dev)
240                 goto out;
241         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242                         sizeof(in_dev->cnf));
243         in_dev->cnf.sysctl = NULL;
244         in_dev->dev = dev;
245         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246         if (!in_dev->arp_parms)
247                 goto out_kfree;
248         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249                 dev_disable_lro(dev);
250         /* Reference in_dev->dev */
251         dev_hold(dev);
252         /* Account for reference dev->ip_ptr (below) */
253         in_dev_hold(in_dev);
254
255         devinet_sysctl_register(in_dev);
256         ip_mc_init_dev(in_dev);
257         if (dev->flags & IFF_UP)
258                 ip_mc_up(in_dev);
259
260         /* we can receive as soon as ip_ptr is set -- do this last */
261         rcu_assign_pointer(dev->ip_ptr, in_dev);
262 out:
263         return in_dev;
264 out_kfree:
265         kfree(in_dev);
266         in_dev = NULL;
267         goto out;
268 }
269
270 static void in_dev_rcu_put(struct rcu_head *head)
271 {
272         struct in_device *idev = container_of(head, struct in_device, rcu_head);
273         in_dev_put(idev);
274 }
275
276 static void inetdev_destroy(struct in_device *in_dev)
277 {
278         struct in_ifaddr *ifa;
279         struct net_device *dev;
280
281         ASSERT_RTNL();
282
283         dev = in_dev->dev;
284
285         in_dev->dead = 1;
286
287         ip_mc_destroy_dev(in_dev);
288
289         while ((ifa = in_dev->ifa_list) != NULL) {
290                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291                 inet_free_ifa(ifa);
292         }
293
294         RCU_INIT_POINTER(dev->ip_ptr, NULL);
295
296         devinet_sysctl_unregister(in_dev);
297         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298         arp_ifdown(dev);
299
300         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301 }
302
303 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304 {
305         rcu_read_lock();
306         for_primary_ifa(in_dev) {
307                 if (inet_ifa_match(a, ifa)) {
308                         if (!b || inet_ifa_match(b, ifa)) {
309                                 rcu_read_unlock();
310                                 return 1;
311                         }
312                 }
313         } endfor_ifa(in_dev);
314         rcu_read_unlock();
315         return 0;
316 }
317
318 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319                          int destroy, struct nlmsghdr *nlh, u32 portid)
320 {
321         struct in_ifaddr *promote = NULL;
322         struct in_ifaddr *ifa, *ifa1 = *ifap;
323         struct in_ifaddr *last_prim = in_dev->ifa_list;
324         struct in_ifaddr *prev_prom = NULL;
325         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326
327         ASSERT_RTNL();
328
329         /* 1. Deleting primary ifaddr forces deletion all secondaries
330          * unless alias promotion is set
331          **/
332
333         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
334                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
335
336                 while ((ifa = *ifap1) != NULL) {
337                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
338                             ifa1->ifa_scope <= ifa->ifa_scope)
339                                 last_prim = ifa;
340
341                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
342                             ifa1->ifa_mask != ifa->ifa_mask ||
343                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
344                                 ifap1 = &ifa->ifa_next;
345                                 prev_prom = ifa;
346                                 continue;
347                         }
348
349                         if (!do_promote) {
350                                 inet_hash_remove(ifa);
351                                 *ifap1 = ifa->ifa_next;
352
353                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
354                                 blocking_notifier_call_chain(&inetaddr_chain,
355                                                 NETDEV_DOWN, ifa);
356                                 inet_free_ifa(ifa);
357                         } else {
358                                 promote = ifa;
359                                 break;
360                         }
361                 }
362         }
363
364         /* On promotion all secondaries from subnet are changing
365          * the primary IP, we must remove all their routes silently
366          * and later to add them back with new prefsrc. Do this
367          * while all addresses are on the device list.
368          */
369         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370                 if (ifa1->ifa_mask == ifa->ifa_mask &&
371                     inet_ifa_match(ifa1->ifa_address, ifa))
372                         fib_del_ifaddr(ifa, ifa1);
373         }
374
375         /* 2. Unlink it */
376
377         *ifap = ifa1->ifa_next;
378         inet_hash_remove(ifa1);
379
380         /* 3. Announce address deletion */
381
382         /* Send message first, then call notifier.
383            At first sight, FIB update triggered by notifier
384            will refer to already deleted ifaddr, that could confuse
385            netlink listeners. It is not true: look, gated sees
386            that route deleted and if it still thinks that ifaddr
387            is valid, it will try to restore deleted routes... Grr.
388            So that, this order is correct.
389          */
390         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
391         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392
393         if (promote) {
394                 struct in_ifaddr *next_sec = promote->ifa_next;
395
396                 if (prev_prom) {
397                         prev_prom->ifa_next = promote->ifa_next;
398                         promote->ifa_next = last_prim->ifa_next;
399                         last_prim->ifa_next = promote;
400                 }
401
402                 promote->ifa_flags &= ~IFA_F_SECONDARY;
403                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
404                 blocking_notifier_call_chain(&inetaddr_chain,
405                                 NETDEV_UP, promote);
406                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
407                         if (ifa1->ifa_mask != ifa->ifa_mask ||
408                             !inet_ifa_match(ifa1->ifa_address, ifa))
409                                         continue;
410                         fib_add_ifaddr(ifa);
411                 }
412
413         }
414         if (destroy)
415                 inet_free_ifa(ifa1);
416 }
417
418 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419                          int destroy)
420 {
421         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
422 }
423
424 static void check_lifetime(struct work_struct *work);
425
426 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
427
428 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
429                              u32 portid)
430 {
431         struct in_device *in_dev = ifa->ifa_dev;
432         struct in_ifaddr *ifa1, **ifap, **last_primary;
433
434         ASSERT_RTNL();
435
436         if (!ifa->ifa_local) {
437                 inet_free_ifa(ifa);
438                 return 0;
439         }
440
441         ifa->ifa_flags &= ~IFA_F_SECONDARY;
442         last_primary = &in_dev->ifa_list;
443
444         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
445              ifap = &ifa1->ifa_next) {
446                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
447                     ifa->ifa_scope <= ifa1->ifa_scope)
448                         last_primary = &ifa1->ifa_next;
449                 if (ifa1->ifa_mask == ifa->ifa_mask &&
450                     inet_ifa_match(ifa1->ifa_address, ifa)) {
451                         if (ifa1->ifa_local == ifa->ifa_local) {
452                                 inet_free_ifa(ifa);
453                                 return -EEXIST;
454                         }
455                         if (ifa1->ifa_scope != ifa->ifa_scope) {
456                                 inet_free_ifa(ifa);
457                                 return -EINVAL;
458                         }
459                         ifa->ifa_flags |= IFA_F_SECONDARY;
460                 }
461         }
462
463         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
464                 prandom_seed((__force u32) ifa->ifa_local);
465                 ifap = last_primary;
466         }
467
468         ifa->ifa_next = *ifap;
469         *ifap = ifa;
470
471         inet_hash_insert(dev_net(in_dev->dev), ifa);
472
473         cancel_delayed_work(&check_lifetime_work);
474         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
475
476         /* Send message first, then call notifier.
477            Notifier will trigger FIB update, so that
478            listeners of netlink will know about new ifaddr */
479         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
480         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
481
482         return 0;
483 }
484
485 static int inet_insert_ifa(struct in_ifaddr *ifa)
486 {
487         return __inet_insert_ifa(ifa, NULL, 0);
488 }
489
490 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
491 {
492         struct in_device *in_dev = __in_dev_get_rtnl(dev);
493
494         ASSERT_RTNL();
495
496         if (!in_dev) {
497                 inet_free_ifa(ifa);
498                 return -ENOBUFS;
499         }
500         ipv4_devconf_setall(in_dev);
501         neigh_parms_data_state_setall(in_dev->arp_parms);
502         if (ifa->ifa_dev != in_dev) {
503                 WARN_ON(ifa->ifa_dev);
504                 in_dev_hold(in_dev);
505                 ifa->ifa_dev = in_dev;
506         }
507         if (ipv4_is_loopback(ifa->ifa_local))
508                 ifa->ifa_scope = RT_SCOPE_HOST;
509         return inet_insert_ifa(ifa);
510 }
511
512 /* Caller must hold RCU or RTNL :
513  * We dont take a reference on found in_device
514  */
515 struct in_device *inetdev_by_index(struct net *net, int ifindex)
516 {
517         struct net_device *dev;
518         struct in_device *in_dev = NULL;
519
520         rcu_read_lock();
521         dev = dev_get_by_index_rcu(net, ifindex);
522         if (dev)
523                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
524         rcu_read_unlock();
525         return in_dev;
526 }
527 EXPORT_SYMBOL(inetdev_by_index);
528
529 /* Called only from RTNL semaphored context. No locks. */
530
531 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
532                                     __be32 mask)
533 {
534         ASSERT_RTNL();
535
536         for_primary_ifa(in_dev) {
537                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
538                         return ifa;
539         } endfor_ifa(in_dev);
540         return NULL;
541 }
542
543 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
544 {
545         struct net *net = sock_net(skb->sk);
546         struct nlattr *tb[IFA_MAX+1];
547         struct in_device *in_dev;
548         struct ifaddrmsg *ifm;
549         struct in_ifaddr *ifa, **ifap;
550         int err = -EINVAL;
551
552         ASSERT_RTNL();
553
554         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
555         if (err < 0)
556                 goto errout;
557
558         ifm = nlmsg_data(nlh);
559         in_dev = inetdev_by_index(net, ifm->ifa_index);
560         if (in_dev == NULL) {
561                 err = -ENODEV;
562                 goto errout;
563         }
564
565         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
566              ifap = &ifa->ifa_next) {
567                 if (tb[IFA_LOCAL] &&
568                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
569                         continue;
570
571                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
572                         continue;
573
574                 if (tb[IFA_ADDRESS] &&
575                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
576                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
577                         continue;
578
579                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
580                 return 0;
581         }
582
583         err = -EADDRNOTAVAIL;
584 errout:
585         return err;
586 }
587
588 #define INFINITY_LIFE_TIME      0xFFFFFFFF
589
590 static void check_lifetime(struct work_struct *work)
591 {
592         unsigned long now, next, next_sec, next_sched;
593         struct in_ifaddr *ifa;
594         struct hlist_node *n;
595         int i;
596
597         now = jiffies;
598         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
599
600         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
601                 bool change_needed = false;
602
603                 rcu_read_lock();
604                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
605                         unsigned long age;
606
607                         if (ifa->ifa_flags & IFA_F_PERMANENT)
608                                 continue;
609
610                         /* We try to batch several events at once. */
611                         age = (now - ifa->ifa_tstamp +
612                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
613
614                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
615                             age >= ifa->ifa_valid_lft) {
616                                 change_needed = true;
617                         } else if (ifa->ifa_preferred_lft ==
618                                    INFINITY_LIFE_TIME) {
619                                 continue;
620                         } else if (age >= ifa->ifa_preferred_lft) {
621                                 if (time_before(ifa->ifa_tstamp +
622                                                 ifa->ifa_valid_lft * HZ, next))
623                                         next = ifa->ifa_tstamp +
624                                                ifa->ifa_valid_lft * HZ;
625
626                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
627                                         change_needed = true;
628                         } else if (time_before(ifa->ifa_tstamp +
629                                                ifa->ifa_preferred_lft * HZ,
630                                                next)) {
631                                 next = ifa->ifa_tstamp +
632                                        ifa->ifa_preferred_lft * HZ;
633                         }
634                 }
635                 rcu_read_unlock();
636                 if (!change_needed)
637                         continue;
638                 rtnl_lock();
639                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
640                         unsigned long age;
641
642                         if (ifa->ifa_flags & IFA_F_PERMANENT)
643                                 continue;
644
645                         /* We try to batch several events at once. */
646                         age = (now - ifa->ifa_tstamp +
647                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
648
649                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
650                             age >= ifa->ifa_valid_lft) {
651                                 struct in_ifaddr **ifap;
652
653                                 for (ifap = &ifa->ifa_dev->ifa_list;
654                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
655                                         if (*ifap == ifa) {
656                                                 inet_del_ifa(ifa->ifa_dev,
657                                                              ifap, 1);
658                                                 break;
659                                         }
660                                 }
661                         } else if (ifa->ifa_preferred_lft !=
662                                    INFINITY_LIFE_TIME &&
663                                    age >= ifa->ifa_preferred_lft &&
664                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
665                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
666                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
667                         }
668                 }
669                 rtnl_unlock();
670         }
671
672         next_sec = round_jiffies_up(next);
673         next_sched = next;
674
675         /* If rounded timeout is accurate enough, accept it. */
676         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
677                 next_sched = next_sec;
678
679         now = jiffies;
680         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
681         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
682                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
683
684         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
685                         next_sched - now);
686 }
687
688 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
689                              __u32 prefered_lft)
690 {
691         unsigned long timeout;
692
693         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
694
695         timeout = addrconf_timeout_fixup(valid_lft, HZ);
696         if (addrconf_finite_timeout(timeout))
697                 ifa->ifa_valid_lft = timeout;
698         else
699                 ifa->ifa_flags |= IFA_F_PERMANENT;
700
701         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
702         if (addrconf_finite_timeout(timeout)) {
703                 if (timeout == 0)
704                         ifa->ifa_flags |= IFA_F_DEPRECATED;
705                 ifa->ifa_preferred_lft = timeout;
706         }
707         ifa->ifa_tstamp = jiffies;
708         if (!ifa->ifa_cstamp)
709                 ifa->ifa_cstamp = ifa->ifa_tstamp;
710 }
711
712 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
713                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
714 {
715         struct nlattr *tb[IFA_MAX+1];
716         struct in_ifaddr *ifa;
717         struct ifaddrmsg *ifm;
718         struct net_device *dev;
719         struct in_device *in_dev;
720         int err;
721
722         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
723         if (err < 0)
724                 goto errout;
725
726         ifm = nlmsg_data(nlh);
727         err = -EINVAL;
728         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
729                 goto errout;
730
731         dev = __dev_get_by_index(net, ifm->ifa_index);
732         err = -ENODEV;
733         if (dev == NULL)
734                 goto errout;
735
736         in_dev = __in_dev_get_rtnl(dev);
737         err = -ENOBUFS;
738         if (in_dev == NULL)
739                 goto errout;
740
741         ifa = inet_alloc_ifa();
742         if (ifa == NULL)
743                 /*
744                  * A potential indev allocation can be left alive, it stays
745                  * assigned to its device and is destroy with it.
746                  */
747                 goto errout;
748
749         ipv4_devconf_setall(in_dev);
750         neigh_parms_data_state_setall(in_dev->arp_parms);
751         in_dev_hold(in_dev);
752
753         if (tb[IFA_ADDRESS] == NULL)
754                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
755
756         INIT_HLIST_NODE(&ifa->hash);
757         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
758         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
759         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
760                                          ifm->ifa_flags;
761         ifa->ifa_scope = ifm->ifa_scope;
762         ifa->ifa_dev = in_dev;
763
764         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
765         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
766
767         if (tb[IFA_BROADCAST])
768                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
769
770         if (tb[IFA_LABEL])
771                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
772         else
773                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
774
775         if (tb[IFA_CACHEINFO]) {
776                 struct ifa_cacheinfo *ci;
777
778                 ci = nla_data(tb[IFA_CACHEINFO]);
779                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
780                         err = -EINVAL;
781                         goto errout_free;
782                 }
783                 *pvalid_lft = ci->ifa_valid;
784                 *pprefered_lft = ci->ifa_prefered;
785         }
786
787         return ifa;
788
789 errout_free:
790         inet_free_ifa(ifa);
791 errout:
792         return ERR_PTR(err);
793 }
794
795 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
796 {
797         struct in_device *in_dev = ifa->ifa_dev;
798         struct in_ifaddr *ifa1, **ifap;
799
800         if (!ifa->ifa_local)
801                 return NULL;
802
803         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
804              ifap = &ifa1->ifa_next) {
805                 if (ifa1->ifa_mask == ifa->ifa_mask &&
806                     inet_ifa_match(ifa1->ifa_address, ifa) &&
807                     ifa1->ifa_local == ifa->ifa_local)
808                         return ifa1;
809         }
810         return NULL;
811 }
812
813 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
814 {
815         struct net *net = sock_net(skb->sk);
816         struct in_ifaddr *ifa;
817         struct in_ifaddr *ifa_existing;
818         __u32 valid_lft = INFINITY_LIFE_TIME;
819         __u32 prefered_lft = INFINITY_LIFE_TIME;
820
821         ASSERT_RTNL();
822
823         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
824         if (IS_ERR(ifa))
825                 return PTR_ERR(ifa);
826
827         ifa_existing = find_matching_ifa(ifa);
828         if (!ifa_existing) {
829                 /* It would be best to check for !NLM_F_CREATE here but
830                  * userspace already relies on not having to provide this.
831                  */
832                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
833                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
834         } else {
835                 inet_free_ifa(ifa);
836
837                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
838                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
839                         return -EEXIST;
840                 ifa = ifa_existing;
841                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
842                 cancel_delayed_work(&check_lifetime_work);
843                 queue_delayed_work(system_power_efficient_wq,
844                                 &check_lifetime_work, 0);
845                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
846                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
847         }
848         return 0;
849 }
850
851 /*
852  *      Determine a default network mask, based on the IP address.
853  */
854
855 static int inet_abc_len(__be32 addr)
856 {
857         int rc = -1;    /* Something else, probably a multicast. */
858
859         if (ipv4_is_zeronet(addr))
860                 rc = 0;
861         else {
862                 __u32 haddr = ntohl(addr);
863
864                 if (IN_CLASSA(haddr))
865                         rc = 8;
866                 else if (IN_CLASSB(haddr))
867                         rc = 16;
868                 else if (IN_CLASSC(haddr))
869                         rc = 24;
870         }
871
872         return rc;
873 }
874
875
876 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
877 {
878         struct ifreq ifr;
879         struct sockaddr_in sin_orig;
880         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
881         struct in_device *in_dev;
882         struct in_ifaddr **ifap = NULL;
883         struct in_ifaddr *ifa = NULL;
884         struct net_device *dev;
885         char *colon;
886         int ret = -EFAULT;
887         int tryaddrmatch = 0;
888
889         /*
890          *      Fetch the caller's info block into kernel space
891          */
892
893         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
894                 goto out;
895         ifr.ifr_name[IFNAMSIZ - 1] = 0;
896
897         /* save original address for comparison */
898         memcpy(&sin_orig, sin, sizeof(*sin));
899
900         colon = strchr(ifr.ifr_name, ':');
901         if (colon)
902                 *colon = 0;
903
904         dev_load(net, ifr.ifr_name);
905
906         switch (cmd) {
907         case SIOCGIFADDR:       /* Get interface address */
908         case SIOCGIFBRDADDR:    /* Get the broadcast address */
909         case SIOCGIFDSTADDR:    /* Get the destination address */
910         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
911                 /* Note that these ioctls will not sleep,
912                    so that we do not impose a lock.
913                    One day we will be forced to put shlock here (I mean SMP)
914                  */
915                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
916                 memset(sin, 0, sizeof(*sin));
917                 sin->sin_family = AF_INET;
918                 break;
919
920         case SIOCSIFFLAGS:
921                 ret = -EPERM;
922                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
923                         goto out;
924                 break;
925         case SIOCSIFADDR:       /* Set interface address (and family) */
926         case SIOCSIFBRDADDR:    /* Set the broadcast address */
927         case SIOCSIFDSTADDR:    /* Set the destination address */
928         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
929                 ret = -EPERM;
930                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
931                         goto out;
932                 ret = -EINVAL;
933                 if (sin->sin_family != AF_INET)
934                         goto out;
935                 break;
936         default:
937                 ret = -EINVAL;
938                 goto out;
939         }
940
941         rtnl_lock();
942
943         ret = -ENODEV;
944         dev = __dev_get_by_name(net, ifr.ifr_name);
945         if (!dev)
946                 goto done;
947
948         if (colon)
949                 *colon = ':';
950
951         in_dev = __in_dev_get_rtnl(dev);
952         if (in_dev) {
953                 if (tryaddrmatch) {
954                         /* Matthias Andree */
955                         /* compare label and address (4.4BSD style) */
956                         /* note: we only do this for a limited set of ioctls
957                            and only if the original address family was AF_INET.
958                            This is checked above. */
959                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
960                              ifap = &ifa->ifa_next) {
961                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
962                                     sin_orig.sin_addr.s_addr ==
963                                                         ifa->ifa_local) {
964                                         break; /* found */
965                                 }
966                         }
967                 }
968                 /* we didn't get a match, maybe the application is
969                    4.3BSD-style and passed in junk so we fall back to
970                    comparing just the label */
971                 if (!ifa) {
972                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
973                              ifap = &ifa->ifa_next)
974                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
975                                         break;
976                 }
977         }
978
979         ret = -EADDRNOTAVAIL;
980         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
981                 goto done;
982
983         switch (cmd) {
984         case SIOCGIFADDR:       /* Get interface address */
985                 sin->sin_addr.s_addr = ifa->ifa_local;
986                 goto rarok;
987
988         case SIOCGIFBRDADDR:    /* Get the broadcast address */
989                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
990                 goto rarok;
991
992         case SIOCGIFDSTADDR:    /* Get the destination address */
993                 sin->sin_addr.s_addr = ifa->ifa_address;
994                 goto rarok;
995
996         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
997                 sin->sin_addr.s_addr = ifa->ifa_mask;
998                 goto rarok;
999
1000         case SIOCSIFFLAGS:
1001                 if (colon) {
1002                         ret = -EADDRNOTAVAIL;
1003                         if (!ifa)
1004                                 break;
1005                         ret = 0;
1006                         if (!(ifr.ifr_flags & IFF_UP))
1007                                 inet_del_ifa(in_dev, ifap, 1);
1008                         break;
1009                 }
1010                 ret = dev_change_flags(dev, ifr.ifr_flags);
1011                 break;
1012
1013         case SIOCSIFADDR:       /* Set interface address (and family) */
1014                 ret = -EINVAL;
1015                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1016                         break;
1017
1018                 if (!ifa) {
1019                         ret = -ENOBUFS;
1020                         ifa = inet_alloc_ifa();
1021                         if (!ifa)
1022                                 break;
1023                         INIT_HLIST_NODE(&ifa->hash);
1024                         if (colon)
1025                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1026                         else
1027                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1028                 } else {
1029                         ret = 0;
1030                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1031                                 break;
1032                         inet_del_ifa(in_dev, ifap, 0);
1033                         ifa->ifa_broadcast = 0;
1034                         ifa->ifa_scope = 0;
1035                 }
1036
1037                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1038
1039                 if (!(dev->flags & IFF_POINTOPOINT)) {
1040                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1041                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1042                         if ((dev->flags & IFF_BROADCAST) &&
1043                             ifa->ifa_prefixlen < 31)
1044                                 ifa->ifa_broadcast = ifa->ifa_address |
1045                                                      ~ifa->ifa_mask;
1046                 } else {
1047                         ifa->ifa_prefixlen = 32;
1048                         ifa->ifa_mask = inet_make_mask(32);
1049                 }
1050                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1051                 ret = inet_set_ifa(dev, ifa);
1052                 break;
1053
1054         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1055                 ret = 0;
1056                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1057                         inet_del_ifa(in_dev, ifap, 0);
1058                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1059                         inet_insert_ifa(ifa);
1060                 }
1061                 break;
1062
1063         case SIOCSIFDSTADDR:    /* Set the destination address */
1064                 ret = 0;
1065                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1066                         break;
1067                 ret = -EINVAL;
1068                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1069                         break;
1070                 ret = 0;
1071                 inet_del_ifa(in_dev, ifap, 0);
1072                 ifa->ifa_address = sin->sin_addr.s_addr;
1073                 inet_insert_ifa(ifa);
1074                 break;
1075
1076         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1077
1078                 /*
1079                  *      The mask we set must be legal.
1080                  */
1081                 ret = -EINVAL;
1082                 if (bad_mask(sin->sin_addr.s_addr, 0))
1083                         break;
1084                 ret = 0;
1085                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1086                         __be32 old_mask = ifa->ifa_mask;
1087                         inet_del_ifa(in_dev, ifap, 0);
1088                         ifa->ifa_mask = sin->sin_addr.s_addr;
1089                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1090
1091                         /* See if current broadcast address matches
1092                          * with current netmask, then recalculate
1093                          * the broadcast address. Otherwise it's a
1094                          * funny address, so don't touch it since
1095                          * the user seems to know what (s)he's doing...
1096                          */
1097                         if ((dev->flags & IFF_BROADCAST) &&
1098                             (ifa->ifa_prefixlen < 31) &&
1099                             (ifa->ifa_broadcast ==
1100                              (ifa->ifa_local|~old_mask))) {
1101                                 ifa->ifa_broadcast = (ifa->ifa_local |
1102                                                       ~sin->sin_addr.s_addr);
1103                         }
1104                         inet_insert_ifa(ifa);
1105                 }
1106                 break;
1107         }
1108 done:
1109         rtnl_unlock();
1110 out:
1111         return ret;
1112 rarok:
1113         rtnl_unlock();
1114         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1115         goto out;
1116 }
1117
1118 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1119 {
1120         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1121         struct in_ifaddr *ifa;
1122         struct ifreq ifr;
1123         int done = 0;
1124
1125         if (!in_dev)
1126                 goto out;
1127
1128         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1129                 if (!buf) {
1130                         done += sizeof(ifr);
1131                         continue;
1132                 }
1133                 if (len < (int) sizeof(ifr))
1134                         break;
1135                 memset(&ifr, 0, sizeof(struct ifreq));
1136                 strcpy(ifr.ifr_name, ifa->ifa_label);
1137
1138                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1139                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1140                                                                 ifa->ifa_local;
1141
1142                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1143                         done = -EFAULT;
1144                         break;
1145                 }
1146                 buf  += sizeof(struct ifreq);
1147                 len  -= sizeof(struct ifreq);
1148                 done += sizeof(struct ifreq);
1149         }
1150 out:
1151         return done;
1152 }
1153
1154 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1155 {
1156         __be32 addr = 0;
1157         struct in_device *in_dev;
1158         struct net *net = dev_net(dev);
1159
1160         rcu_read_lock();
1161         in_dev = __in_dev_get_rcu(dev);
1162         if (!in_dev)
1163                 goto no_in_dev;
1164
1165         for_primary_ifa(in_dev) {
1166                 if (ifa->ifa_scope > scope)
1167                         continue;
1168                 if (!dst || inet_ifa_match(dst, ifa)) {
1169                         addr = ifa->ifa_local;
1170                         break;
1171                 }
1172                 if (!addr)
1173                         addr = ifa->ifa_local;
1174         } endfor_ifa(in_dev);
1175
1176         if (addr)
1177                 goto out_unlock;
1178 no_in_dev:
1179
1180         /* Not loopback addresses on loopback should be preferred
1181            in this case. It is importnat that lo is the first interface
1182            in dev_base list.
1183          */
1184         for_each_netdev_rcu(net, dev) {
1185                 in_dev = __in_dev_get_rcu(dev);
1186                 if (!in_dev)
1187                         continue;
1188
1189                 for_primary_ifa(in_dev) {
1190                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1191                             ifa->ifa_scope <= scope) {
1192                                 addr = ifa->ifa_local;
1193                                 goto out_unlock;
1194                         }
1195                 } endfor_ifa(in_dev);
1196         }
1197 out_unlock:
1198         rcu_read_unlock();
1199         return addr;
1200 }
1201 EXPORT_SYMBOL(inet_select_addr);
1202
1203 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1204                               __be32 local, int scope)
1205 {
1206         int same = 0;
1207         __be32 addr = 0;
1208
1209         for_ifa(in_dev) {
1210                 if (!addr &&
1211                     (local == ifa->ifa_local || !local) &&
1212                     ifa->ifa_scope <= scope) {
1213                         addr = ifa->ifa_local;
1214                         if (same)
1215                                 break;
1216                 }
1217                 if (!same) {
1218                         same = (!local || inet_ifa_match(local, ifa)) &&
1219                                 (!dst || inet_ifa_match(dst, ifa));
1220                         if (same && addr) {
1221                                 if (local || !dst)
1222                                         break;
1223                                 /* Is the selected addr into dst subnet? */
1224                                 if (inet_ifa_match(addr, ifa))
1225                                         break;
1226                                 /* No, then can we use new local src? */
1227                                 if (ifa->ifa_scope <= scope) {
1228                                         addr = ifa->ifa_local;
1229                                         break;
1230                                 }
1231                                 /* search for large dst subnet for addr */
1232                                 same = 0;
1233                         }
1234                 }
1235         } endfor_ifa(in_dev);
1236
1237         return same ? addr : 0;
1238 }
1239
1240 /*
1241  * Confirm that local IP address exists using wildcards:
1242  * - net: netns to check, cannot be NULL
1243  * - in_dev: only on this interface, NULL=any interface
1244  * - dst: only in the same subnet as dst, 0=any dst
1245  * - local: address, 0=autoselect the local address
1246  * - scope: maximum allowed scope value for the local address
1247  */
1248 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1249                          __be32 dst, __be32 local, int scope)
1250 {
1251         __be32 addr = 0;
1252         struct net_device *dev;
1253
1254         if (in_dev != NULL)
1255                 return confirm_addr_indev(in_dev, dst, local, scope);
1256
1257         rcu_read_lock();
1258         for_each_netdev_rcu(net, dev) {
1259                 in_dev = __in_dev_get_rcu(dev);
1260                 if (in_dev) {
1261                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1262                         if (addr)
1263                                 break;
1264                 }
1265         }
1266         rcu_read_unlock();
1267
1268         return addr;
1269 }
1270 EXPORT_SYMBOL(inet_confirm_addr);
1271
1272 /*
1273  *      Device notifier
1274  */
1275
1276 int register_inetaddr_notifier(struct notifier_block *nb)
1277 {
1278         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1279 }
1280 EXPORT_SYMBOL(register_inetaddr_notifier);
1281
1282 int unregister_inetaddr_notifier(struct notifier_block *nb)
1283 {
1284         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1285 }
1286 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1287
1288 /* Rename ifa_labels for a device name change. Make some effort to preserve
1289  * existing alias numbering and to create unique labels if possible.
1290 */
1291 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1292 {
1293         struct in_ifaddr *ifa;
1294         int named = 0;
1295
1296         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1297                 char old[IFNAMSIZ], *dot;
1298
1299                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1300                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1301                 if (named++ == 0)
1302                         goto skip;
1303                 dot = strchr(old, ':');
1304                 if (dot == NULL) {
1305                         sprintf(old, ":%d", named);
1306                         dot = old;
1307                 }
1308                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1309                         strcat(ifa->ifa_label, dot);
1310                 else
1311                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1312 skip:
1313                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1314         }
1315 }
1316
1317 static bool inetdev_valid_mtu(unsigned int mtu)
1318 {
1319         return mtu >= 68;
1320 }
1321
1322 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1323                                         struct in_device *in_dev)
1324
1325 {
1326         struct in_ifaddr *ifa;
1327
1328         for (ifa = in_dev->ifa_list; ifa;
1329              ifa = ifa->ifa_next) {
1330                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1331                          ifa->ifa_local, dev,
1332                          ifa->ifa_local, NULL,
1333                          dev->dev_addr, NULL);
1334         }
1335 }
1336
1337 /* Called only under RTNL semaphore */
1338
1339 static int inetdev_event(struct notifier_block *this, unsigned long event,
1340                          void *ptr)
1341 {
1342         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1343         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1344
1345         ASSERT_RTNL();
1346
1347         if (!in_dev) {
1348                 if (event == NETDEV_REGISTER) {
1349                         in_dev = inetdev_init(dev);
1350                         if (!in_dev)
1351                                 return notifier_from_errno(-ENOMEM);
1352                         if (dev->flags & IFF_LOOPBACK) {
1353                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1354                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1355                         }
1356                 } else if (event == NETDEV_CHANGEMTU) {
1357                         /* Re-enabling IP */
1358                         if (inetdev_valid_mtu(dev->mtu))
1359                                 in_dev = inetdev_init(dev);
1360                 }
1361                 goto out;
1362         }
1363
1364         switch (event) {
1365         case NETDEV_REGISTER:
1366                 pr_debug("%s: bug\n", __func__);
1367                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1368                 break;
1369         case NETDEV_UP:
1370                 if (!inetdev_valid_mtu(dev->mtu))
1371                         break;
1372                 if (dev->flags & IFF_LOOPBACK) {
1373                         struct in_ifaddr *ifa = inet_alloc_ifa();
1374
1375                         if (ifa) {
1376                                 INIT_HLIST_NODE(&ifa->hash);
1377                                 ifa->ifa_local =
1378                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1379                                 ifa->ifa_prefixlen = 8;
1380                                 ifa->ifa_mask = inet_make_mask(8);
1381                                 in_dev_hold(in_dev);
1382                                 ifa->ifa_dev = in_dev;
1383                                 ifa->ifa_scope = RT_SCOPE_HOST;
1384                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1385                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1386                                                  INFINITY_LIFE_TIME);
1387                                 ipv4_devconf_setall(in_dev);
1388                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1389                                 inet_insert_ifa(ifa);
1390                         }
1391                 }
1392                 ip_mc_up(in_dev);
1393                 /* fall through */
1394         case NETDEV_CHANGEADDR:
1395                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1396                         break;
1397                 /* fall through */
1398         case NETDEV_NOTIFY_PEERS:
1399                 /* Send gratuitous ARP to notify of link change */
1400                 inetdev_send_gratuitous_arp(dev, in_dev);
1401                 break;
1402         case NETDEV_DOWN:
1403                 ip_mc_down(in_dev);
1404                 break;
1405         case NETDEV_PRE_TYPE_CHANGE:
1406                 ip_mc_unmap(in_dev);
1407                 break;
1408         case NETDEV_POST_TYPE_CHANGE:
1409                 ip_mc_remap(in_dev);
1410                 break;
1411         case NETDEV_CHANGEMTU:
1412                 if (inetdev_valid_mtu(dev->mtu))
1413                         break;
1414                 /* disable IP when MTU is not enough */
1415         case NETDEV_UNREGISTER:
1416                 inetdev_destroy(in_dev);
1417                 break;
1418         case NETDEV_CHANGENAME:
1419                 /* Do not notify about label change, this event is
1420                  * not interesting to applications using netlink.
1421                  */
1422                 inetdev_changename(dev, in_dev);
1423
1424                 devinet_sysctl_unregister(in_dev);
1425                 devinet_sysctl_register(in_dev);
1426                 break;
1427         }
1428 out:
1429         return NOTIFY_DONE;
1430 }
1431
1432 static struct notifier_block ip_netdev_notifier = {
1433         .notifier_call = inetdev_event,
1434 };
1435
1436 static size_t inet_nlmsg_size(void)
1437 {
1438         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1439                + nla_total_size(4) /* IFA_ADDRESS */
1440                + nla_total_size(4) /* IFA_LOCAL */
1441                + nla_total_size(4) /* IFA_BROADCAST */
1442                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1443                + nla_total_size(4)  /* IFA_FLAGS */
1444                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1445 }
1446
1447 static inline u32 cstamp_delta(unsigned long cstamp)
1448 {
1449         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1450 }
1451
1452 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1453                          unsigned long tstamp, u32 preferred, u32 valid)
1454 {
1455         struct ifa_cacheinfo ci;
1456
1457         ci.cstamp = cstamp_delta(cstamp);
1458         ci.tstamp = cstamp_delta(tstamp);
1459         ci.ifa_prefered = preferred;
1460         ci.ifa_valid = valid;
1461
1462         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1463 }
1464
1465 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1466                             u32 portid, u32 seq, int event, unsigned int flags)
1467 {
1468         struct ifaddrmsg *ifm;
1469         struct nlmsghdr  *nlh;
1470         u32 preferred, valid;
1471
1472         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1473         if (nlh == NULL)
1474                 return -EMSGSIZE;
1475
1476         ifm = nlmsg_data(nlh);
1477         ifm->ifa_family = AF_INET;
1478         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1479         ifm->ifa_flags = ifa->ifa_flags;
1480         ifm->ifa_scope = ifa->ifa_scope;
1481         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1482
1483         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1484                 preferred = ifa->ifa_preferred_lft;
1485                 valid = ifa->ifa_valid_lft;
1486                 if (preferred != INFINITY_LIFE_TIME) {
1487                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1488
1489                         if (preferred > tval)
1490                                 preferred -= tval;
1491                         else
1492                                 preferred = 0;
1493                         if (valid != INFINITY_LIFE_TIME) {
1494                                 if (valid > tval)
1495                                         valid -= tval;
1496                                 else
1497                                         valid = 0;
1498                         }
1499                 }
1500         } else {
1501                 preferred = INFINITY_LIFE_TIME;
1502                 valid = INFINITY_LIFE_TIME;
1503         }
1504         if ((ifa->ifa_address &&
1505              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1506             (ifa->ifa_local &&
1507              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1508             (ifa->ifa_broadcast &&
1509              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1510             (ifa->ifa_label[0] &&
1511              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1512             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1513             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1514                           preferred, valid))
1515                 goto nla_put_failure;
1516
1517         return nlmsg_end(skb, nlh);
1518
1519 nla_put_failure:
1520         nlmsg_cancel(skb, nlh);
1521         return -EMSGSIZE;
1522 }
1523
1524 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1525 {
1526         struct net *net = sock_net(skb->sk);
1527         int h, s_h;
1528         int idx, s_idx;
1529         int ip_idx, s_ip_idx;
1530         struct net_device *dev;
1531         struct in_device *in_dev;
1532         struct in_ifaddr *ifa;
1533         struct hlist_head *head;
1534
1535         s_h = cb->args[0];
1536         s_idx = idx = cb->args[1];
1537         s_ip_idx = ip_idx = cb->args[2];
1538
1539         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1540                 idx = 0;
1541                 head = &net->dev_index_head[h];
1542                 rcu_read_lock();
1543                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1544                           net->dev_base_seq;
1545                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1546                         if (idx < s_idx)
1547                                 goto cont;
1548                         if (h > s_h || idx > s_idx)
1549                                 s_ip_idx = 0;
1550                         in_dev = __in_dev_get_rcu(dev);
1551                         if (!in_dev)
1552                                 goto cont;
1553
1554                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1555                              ifa = ifa->ifa_next, ip_idx++) {
1556                                 if (ip_idx < s_ip_idx)
1557                                         continue;
1558                                 if (inet_fill_ifaddr(skb, ifa,
1559                                              NETLINK_CB(cb->skb).portid,
1560                                              cb->nlh->nlmsg_seq,
1561                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1562                                         rcu_read_unlock();
1563                                         goto done;
1564                                 }
1565                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1566                         }
1567 cont:
1568                         idx++;
1569                 }
1570                 rcu_read_unlock();
1571         }
1572
1573 done:
1574         cb->args[0] = h;
1575         cb->args[1] = idx;
1576         cb->args[2] = ip_idx;
1577
1578         return skb->len;
1579 }
1580
1581 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1582                       u32 portid)
1583 {
1584         struct sk_buff *skb;
1585         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1586         int err = -ENOBUFS;
1587         struct net *net;
1588
1589         net = dev_net(ifa->ifa_dev->dev);
1590         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1591         if (skb == NULL)
1592                 goto errout;
1593
1594         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1595         if (err < 0) {
1596                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1597                 WARN_ON(err == -EMSGSIZE);
1598                 kfree_skb(skb);
1599                 goto errout;
1600         }
1601         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1602         return;
1603 errout:
1604         if (err < 0)
1605                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1606 }
1607
1608 static size_t inet_get_link_af_size(const struct net_device *dev)
1609 {
1610         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1611
1612         if (!in_dev)
1613                 return 0;
1614
1615         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1616 }
1617
1618 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1619 {
1620         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1621         struct nlattr *nla;
1622         int i;
1623
1624         if (!in_dev)
1625                 return -ENODATA;
1626
1627         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1628         if (nla == NULL)
1629                 return -EMSGSIZE;
1630
1631         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1632                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1633
1634         return 0;
1635 }
1636
1637 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1638         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1639 };
1640
1641 static int inet_validate_link_af(const struct net_device *dev,
1642                                  const struct nlattr *nla)
1643 {
1644         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1645         int err, rem;
1646
1647         if (dev && !__in_dev_get_rtnl(dev))
1648                 return -EAFNOSUPPORT;
1649
1650         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1651         if (err < 0)
1652                 return err;
1653
1654         if (tb[IFLA_INET_CONF]) {
1655                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1656                         int cfgid = nla_type(a);
1657
1658                         if (nla_len(a) < 4)
1659                                 return -EINVAL;
1660
1661                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1662                                 return -EINVAL;
1663                 }
1664         }
1665
1666         return 0;
1667 }
1668
1669 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1670 {
1671         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1672         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1673         int rem;
1674
1675         if (!in_dev)
1676                 return -EAFNOSUPPORT;
1677
1678         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1679                 BUG();
1680
1681         if (tb[IFLA_INET_CONF]) {
1682                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1683                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1684         }
1685
1686         return 0;
1687 }
1688
1689 static int inet_netconf_msgsize_devconf(int type)
1690 {
1691         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1692                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1693
1694         /* type -1 is used for ALL */
1695         if (type == -1 || type == NETCONFA_FORWARDING)
1696                 size += nla_total_size(4);
1697         if (type == -1 || type == NETCONFA_RP_FILTER)
1698                 size += nla_total_size(4);
1699         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1700                 size += nla_total_size(4);
1701         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1702                 size += nla_total_size(4);
1703
1704         return size;
1705 }
1706
1707 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1708                                      struct ipv4_devconf *devconf, u32 portid,
1709                                      u32 seq, int event, unsigned int flags,
1710                                      int type)
1711 {
1712         struct nlmsghdr  *nlh;
1713         struct netconfmsg *ncm;
1714
1715         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1716                         flags);
1717         if (nlh == NULL)
1718                 return -EMSGSIZE;
1719
1720         ncm = nlmsg_data(nlh);
1721         ncm->ncm_family = AF_INET;
1722
1723         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1724                 goto nla_put_failure;
1725
1726         /* type -1 is used for ALL */
1727         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1728             nla_put_s32(skb, NETCONFA_FORWARDING,
1729                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1730                 goto nla_put_failure;
1731         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1732             nla_put_s32(skb, NETCONFA_RP_FILTER,
1733                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1734                 goto nla_put_failure;
1735         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1736             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1737                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1738                 goto nla_put_failure;
1739         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1740             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1741                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1742                 goto nla_put_failure;
1743
1744         return nlmsg_end(skb, nlh);
1745
1746 nla_put_failure:
1747         nlmsg_cancel(skb, nlh);
1748         return -EMSGSIZE;
1749 }
1750
1751 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1752                                  struct ipv4_devconf *devconf)
1753 {
1754         struct sk_buff *skb;
1755         int err = -ENOBUFS;
1756
1757         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1758         if (skb == NULL)
1759                 goto errout;
1760
1761         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1762                                         RTM_NEWNETCONF, 0, type);
1763         if (err < 0) {
1764                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1765                 WARN_ON(err == -EMSGSIZE);
1766                 kfree_skb(skb);
1767                 goto errout;
1768         }
1769         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1770         return;
1771 errout:
1772         if (err < 0)
1773                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1774 }
1775
1776 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1777         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1778         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1779         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1780         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1781 };
1782
1783 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1784                                     struct nlmsghdr *nlh)
1785 {
1786         struct net *net = sock_net(in_skb->sk);
1787         struct nlattr *tb[NETCONFA_MAX+1];
1788         struct netconfmsg *ncm;
1789         struct sk_buff *skb;
1790         struct ipv4_devconf *devconf;
1791         struct in_device *in_dev;
1792         struct net_device *dev;
1793         int ifindex;
1794         int err;
1795
1796         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1797                           devconf_ipv4_policy);
1798         if (err < 0)
1799                 goto errout;
1800
1801         err = EINVAL;
1802         if (!tb[NETCONFA_IFINDEX])
1803                 goto errout;
1804
1805         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1806         switch (ifindex) {
1807         case NETCONFA_IFINDEX_ALL:
1808                 devconf = net->ipv4.devconf_all;
1809                 break;
1810         case NETCONFA_IFINDEX_DEFAULT:
1811                 devconf = net->ipv4.devconf_dflt;
1812                 break;
1813         default:
1814                 dev = __dev_get_by_index(net, ifindex);
1815                 if (dev == NULL)
1816                         goto errout;
1817                 in_dev = __in_dev_get_rtnl(dev);
1818                 if (in_dev == NULL)
1819                         goto errout;
1820                 devconf = &in_dev->cnf;
1821                 break;
1822         }
1823
1824         err = -ENOBUFS;
1825         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1826         if (skb == NULL)
1827                 goto errout;
1828
1829         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1830                                         NETLINK_CB(in_skb).portid,
1831                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1832                                         -1);
1833         if (err < 0) {
1834                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1835                 WARN_ON(err == -EMSGSIZE);
1836                 kfree_skb(skb);
1837                 goto errout;
1838         }
1839         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1840 errout:
1841         return err;
1842 }
1843
1844 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1845                                      struct netlink_callback *cb)
1846 {
1847         struct net *net = sock_net(skb->sk);
1848         int h, s_h;
1849         int idx, s_idx;
1850         struct net_device *dev;
1851         struct in_device *in_dev;
1852         struct hlist_head *head;
1853
1854         s_h = cb->args[0];
1855         s_idx = idx = cb->args[1];
1856
1857         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1858                 idx = 0;
1859                 head = &net->dev_index_head[h];
1860                 rcu_read_lock();
1861                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1862                           net->dev_base_seq;
1863                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1864                         if (idx < s_idx)
1865                                 goto cont;
1866                         in_dev = __in_dev_get_rcu(dev);
1867                         if (!in_dev)
1868                                 goto cont;
1869
1870                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1871                                                       &in_dev->cnf,
1872                                                       NETLINK_CB(cb->skb).portid,
1873                                                       cb->nlh->nlmsg_seq,
1874                                                       RTM_NEWNETCONF,
1875                                                       NLM_F_MULTI,
1876                                                       -1) <= 0) {
1877                                 rcu_read_unlock();
1878                                 goto done;
1879                         }
1880                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1881 cont:
1882                         idx++;
1883                 }
1884                 rcu_read_unlock();
1885         }
1886         if (h == NETDEV_HASHENTRIES) {
1887                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1888                                               net->ipv4.devconf_all,
1889                                               NETLINK_CB(cb->skb).portid,
1890                                               cb->nlh->nlmsg_seq,
1891                                               RTM_NEWNETCONF, NLM_F_MULTI,
1892                                               -1) <= 0)
1893                         goto done;
1894                 else
1895                         h++;
1896         }
1897         if (h == NETDEV_HASHENTRIES + 1) {
1898                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1899                                               net->ipv4.devconf_dflt,
1900                                               NETLINK_CB(cb->skb).portid,
1901                                               cb->nlh->nlmsg_seq,
1902                                               RTM_NEWNETCONF, NLM_F_MULTI,
1903                                               -1) <= 0)
1904                         goto done;
1905                 else
1906                         h++;
1907         }
1908 done:
1909         cb->args[0] = h;
1910         cb->args[1] = idx;
1911
1912         return skb->len;
1913 }
1914
1915 #ifdef CONFIG_SYSCTL
1916
1917 static void devinet_copy_dflt_conf(struct net *net, int i)
1918 {
1919         struct net_device *dev;
1920
1921         rcu_read_lock();
1922         for_each_netdev_rcu(net, dev) {
1923                 struct in_device *in_dev;
1924
1925                 in_dev = __in_dev_get_rcu(dev);
1926                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1927                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1928         }
1929         rcu_read_unlock();
1930 }
1931
1932 /* called with RTNL locked */
1933 static void inet_forward_change(struct net *net)
1934 {
1935         struct net_device *dev;
1936         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1937
1938         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1939         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1940         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1941                                     NETCONFA_IFINDEX_ALL,
1942                                     net->ipv4.devconf_all);
1943         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944                                     NETCONFA_IFINDEX_DEFAULT,
1945                                     net->ipv4.devconf_dflt);
1946
1947         for_each_netdev(net, dev) {
1948                 struct in_device *in_dev;
1949                 if (on)
1950                         dev_disable_lro(dev);
1951                 rcu_read_lock();
1952                 in_dev = __in_dev_get_rcu(dev);
1953                 if (in_dev) {
1954                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1955                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1956                                                     dev->ifindex, &in_dev->cnf);
1957                 }
1958                 rcu_read_unlock();
1959         }
1960 }
1961
1962 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1963 {
1964         if (cnf == net->ipv4.devconf_dflt)
1965                 return NETCONFA_IFINDEX_DEFAULT;
1966         else if (cnf == net->ipv4.devconf_all)
1967                 return NETCONFA_IFINDEX_ALL;
1968         else {
1969                 struct in_device *idev
1970                         = container_of(cnf, struct in_device, cnf);
1971                 return idev->dev->ifindex;
1972         }
1973 }
1974
1975 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1976                              void __user *buffer,
1977                              size_t *lenp, loff_t *ppos)
1978 {
1979         int old_value = *(int *)ctl->data;
1980         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1981         int new_value = *(int *)ctl->data;
1982
1983         if (write) {
1984                 struct ipv4_devconf *cnf = ctl->extra1;
1985                 struct net *net = ctl->extra2;
1986                 int i = (int *)ctl->data - cnf->data;
1987                 int ifindex;
1988
1989                 set_bit(i, cnf->state);
1990
1991                 if (cnf == net->ipv4.devconf_dflt)
1992                         devinet_copy_dflt_conf(net, i);
1993                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1994                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1995                         if ((new_value == 0) && (old_value != 0))
1996                                 rt_cache_flush(net);
1997
1998                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1999                     new_value != old_value) {
2000                         ifindex = devinet_conf_ifindex(net, cnf);
2001                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2002                                                     ifindex, cnf);
2003                 }
2004                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2005                     new_value != old_value) {
2006                         ifindex = devinet_conf_ifindex(net, cnf);
2007                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2008                                                     ifindex, cnf);
2009                 }
2010         }
2011
2012         return ret;
2013 }
2014
2015 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2016                                   void __user *buffer,
2017                                   size_t *lenp, loff_t *ppos)
2018 {
2019         int *valp = ctl->data;
2020         int val = *valp;
2021         loff_t pos = *ppos;
2022         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2023
2024         if (write && *valp != val) {
2025                 struct net *net = ctl->extra2;
2026
2027                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2028                         if (!rtnl_trylock()) {
2029                                 /* Restore the original values before restarting */
2030                                 *valp = val;
2031                                 *ppos = pos;
2032                                 return restart_syscall();
2033                         }
2034                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2035                                 inet_forward_change(net);
2036                         } else {
2037                                 struct ipv4_devconf *cnf = ctl->extra1;
2038                                 struct in_device *idev =
2039                                         container_of(cnf, struct in_device, cnf);
2040                                 if (*valp)
2041                                         dev_disable_lro(idev->dev);
2042                                 inet_netconf_notify_devconf(net,
2043                                                             NETCONFA_FORWARDING,
2044                                                             idev->dev->ifindex,
2045                                                             cnf);
2046                         }
2047                         rtnl_unlock();
2048                         rt_cache_flush(net);
2049                 } else
2050                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2051                                                     NETCONFA_IFINDEX_DEFAULT,
2052                                                     net->ipv4.devconf_dflt);
2053         }
2054
2055         return ret;
2056 }
2057
2058 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2059                                 void __user *buffer,
2060                                 size_t *lenp, loff_t *ppos)
2061 {
2062         int *valp = ctl->data;
2063         int val = *valp;
2064         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2065         struct net *net = ctl->extra2;
2066
2067         if (write && *valp != val)
2068                 rt_cache_flush(net);
2069
2070         return ret;
2071 }
2072
2073 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2074         { \
2075                 .procname       = name, \
2076                 .data           = ipv4_devconf.data + \
2077                                   IPV4_DEVCONF_ ## attr - 1, \
2078                 .maxlen         = sizeof(int), \
2079                 .mode           = mval, \
2080                 .proc_handler   = proc, \
2081                 .extra1         = &ipv4_devconf, \
2082         }
2083
2084 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2085         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2086
2087 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2088         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2089
2090 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2091         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2092
2093 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2094         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2095
2096 static struct devinet_sysctl_table {
2097         struct ctl_table_header *sysctl_header;
2098         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2099 } devinet_sysctl = {
2100         .devinet_vars = {
2101                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2102                                              devinet_sysctl_forward),
2103                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2104
2105                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2106                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2107                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2108                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2109                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2110                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2111                                         "accept_source_route"),
2112                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2113                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2114                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2115                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2116                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2117                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2118                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2119                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2120                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2121                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2122                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2123                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2124                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2125                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2126                                         "force_igmp_version"),
2127                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2128                                         "igmpv2_unsolicited_report_interval"),
2129                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2130                                         "igmpv3_unsolicited_report_interval"),
2131
2132                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2133                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2134                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2135                                               "promote_secondaries"),
2136                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2137                                               "route_localnet"),
2138         },
2139 };
2140
2141 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2142                                         struct ipv4_devconf *p)
2143 {
2144         int i;
2145         struct devinet_sysctl_table *t;
2146         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2147
2148         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2149         if (!t)
2150                 goto out;
2151
2152         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2153                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2154                 t->devinet_vars[i].extra1 = p;
2155                 t->devinet_vars[i].extra2 = net;
2156         }
2157
2158         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2159
2160         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2161         if (!t->sysctl_header)
2162                 goto free;
2163
2164         p->sysctl = t;
2165         return 0;
2166
2167 free:
2168         kfree(t);
2169 out:
2170         return -ENOBUFS;
2171 }
2172
2173 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2174 {
2175         struct devinet_sysctl_table *t = cnf->sysctl;
2176
2177         if (t == NULL)
2178                 return;
2179
2180         cnf->sysctl = NULL;
2181         unregister_net_sysctl_table(t->sysctl_header);
2182         kfree(t);
2183 }
2184
2185 static void devinet_sysctl_register(struct in_device *idev)
2186 {
2187         neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2188         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2189                                         &idev->cnf);
2190 }
2191
2192 static void devinet_sysctl_unregister(struct in_device *idev)
2193 {
2194         __devinet_sysctl_unregister(&idev->cnf);
2195         neigh_sysctl_unregister(idev->arp_parms);
2196 }
2197
2198 static struct ctl_table ctl_forward_entry[] = {
2199         {
2200                 .procname       = "ip_forward",
2201                 .data           = &ipv4_devconf.data[
2202                                         IPV4_DEVCONF_FORWARDING - 1],
2203                 .maxlen         = sizeof(int),
2204                 .mode           = 0644,
2205                 .proc_handler   = devinet_sysctl_forward,
2206                 .extra1         = &ipv4_devconf,
2207                 .extra2         = &init_net,
2208         },
2209         { },
2210 };
2211 #endif
2212
2213 static __net_init int devinet_init_net(struct net *net)
2214 {
2215         int err;
2216         struct ipv4_devconf *all, *dflt;
2217 #ifdef CONFIG_SYSCTL
2218         struct ctl_table *tbl = ctl_forward_entry;
2219         struct ctl_table_header *forw_hdr;
2220 #endif
2221
2222         err = -ENOMEM;
2223         all = &ipv4_devconf;
2224         dflt = &ipv4_devconf_dflt;
2225
2226         if (!net_eq(net, &init_net)) {
2227                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2228                 if (all == NULL)
2229                         goto err_alloc_all;
2230
2231                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2232                 if (dflt == NULL)
2233                         goto err_alloc_dflt;
2234
2235 #ifdef CONFIG_SYSCTL
2236                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2237                 if (tbl == NULL)
2238                         goto err_alloc_ctl;
2239
2240                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2241                 tbl[0].extra1 = all;
2242                 tbl[0].extra2 = net;
2243 #endif
2244         }
2245
2246 #ifdef CONFIG_SYSCTL
2247         err = __devinet_sysctl_register(net, "all", all);
2248         if (err < 0)
2249                 goto err_reg_all;
2250
2251         err = __devinet_sysctl_register(net, "default", dflt);
2252         if (err < 0)
2253                 goto err_reg_dflt;
2254
2255         err = -ENOMEM;
2256         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2257         if (forw_hdr == NULL)
2258                 goto err_reg_ctl;
2259         net->ipv4.forw_hdr = forw_hdr;
2260 #endif
2261
2262         net->ipv4.devconf_all = all;
2263         net->ipv4.devconf_dflt = dflt;
2264         return 0;
2265
2266 #ifdef CONFIG_SYSCTL
2267 err_reg_ctl:
2268         __devinet_sysctl_unregister(dflt);
2269 err_reg_dflt:
2270         __devinet_sysctl_unregister(all);
2271 err_reg_all:
2272         if (tbl != ctl_forward_entry)
2273                 kfree(tbl);
2274 err_alloc_ctl:
2275 #endif
2276         if (dflt != &ipv4_devconf_dflt)
2277                 kfree(dflt);
2278 err_alloc_dflt:
2279         if (all != &ipv4_devconf)
2280                 kfree(all);
2281 err_alloc_all:
2282         return err;
2283 }
2284
2285 static __net_exit void devinet_exit_net(struct net *net)
2286 {
2287 #ifdef CONFIG_SYSCTL
2288         struct ctl_table *tbl;
2289
2290         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2291         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2292         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2293         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2294         kfree(tbl);
2295 #endif
2296         kfree(net->ipv4.devconf_dflt);
2297         kfree(net->ipv4.devconf_all);
2298 }
2299
2300 static __net_initdata struct pernet_operations devinet_ops = {
2301         .init = devinet_init_net,
2302         .exit = devinet_exit_net,
2303 };
2304
2305 static struct rtnl_af_ops inet_af_ops = {
2306         .family           = AF_INET,
2307         .fill_link_af     = inet_fill_link_af,
2308         .get_link_af_size = inet_get_link_af_size,
2309         .validate_link_af = inet_validate_link_af,
2310         .set_link_af      = inet_set_link_af,
2311 };
2312
2313 void __init devinet_init(void)
2314 {
2315         int i;
2316
2317         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2318                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2319
2320         register_pernet_subsys(&devinet_ops);
2321
2322         register_gifconf(PF_INET, inet_gifconf);
2323         register_netdevice_notifier(&ip_netdev_notifier);
2324
2325         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2326
2327         rtnl_af_register(&inet_af_ops);
2328
2329         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2330         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2331         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2332         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2333                       inet_netconf_dump_devconf, NULL);
2334 }
2335