[XFRM]: Purge dst references to deleted SAs passively.
[cascardo/linux.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
59 {
60         return ntohl(addr->a4);
61 }
62
63 static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
64 {
65         return ntohl(addr->a6[2]^addr->a6[3]);
66 }
67
68 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr,
69                                            u32 reqid, unsigned short family,
70                                            unsigned int hmask)
71 {
72         unsigned int h = family ^ reqid;
73         switch (family) {
74         case AF_INET:
75                 h ^= __xfrm4_addr_hash(addr);
76                 break;
77         case AF_INET6:
78                 h ^= __xfrm6_addr_hash(addr);
79                 break;
80         };
81         return (h ^ (h >> 16)) & hmask;
82 }
83
84 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid,
85                                          unsigned short family)
86 {
87         return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask);
88 }
89
90 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
91                                        unsigned int hmask)
92 {
93         unsigned int h = family;
94         switch (family) {
95         case AF_INET:
96                 h ^= __xfrm4_addr_hash(addr);
97                 break;
98         case AF_INET6:
99                 h ^= __xfrm6_addr_hash(addr);
100                 break;
101         };
102         return (h ^ (h >> 16)) & hmask;
103 }
104
105 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
106 {
107         return __xfrm_src_hash(addr, family, xfrm_state_hmask);
108 }
109
110 static inline unsigned int
111 __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
112                 unsigned int hmask)
113 {
114         unsigned int h = spi ^ proto;
115         switch (family) {
116         case AF_INET:
117                 h ^= __xfrm4_addr_hash(addr);
118                 break;
119         case AF_INET6:
120                 h ^= __xfrm6_addr_hash(addr);
121                 break;
122         }
123         return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
124 }
125
126 static inline unsigned int
127 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
128 {
129         return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
130 }
131
132 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
133 {
134         struct hlist_head *n;
135
136         if (sz <= PAGE_SIZE)
137                 n = kmalloc(sz, GFP_KERNEL);
138         else if (hashdist)
139                 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
140         else
141                 n = (struct hlist_head *)
142                         __get_free_pages(GFP_KERNEL, get_order(sz));
143
144         if (n)
145                 memset(n, 0, sz);
146
147         return n;
148 }
149
150 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
151 {
152         if (sz <= PAGE_SIZE)
153                 kfree(n);
154         else if (hashdist)
155                 vfree(n);
156         else
157                 free_pages((unsigned long)n, get_order(sz));
158 }
159
160 static void xfrm_hash_transfer(struct hlist_head *list,
161                                struct hlist_head *ndsttable,
162                                struct hlist_head *nsrctable,
163                                struct hlist_head *nspitable,
164                                unsigned int nhashmask)
165 {
166         struct hlist_node *entry, *tmp;
167         struct xfrm_state *x;
168
169         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
170                 unsigned int h;
171
172                 h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid,
173                                     x->props.family, nhashmask);
174                 hlist_add_head(&x->bydst, ndsttable+h);
175
176                 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
177                                     nhashmask);
178                 hlist_add_head(&x->bysrc, nsrctable+h);
179
180                 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
181                                     x->props.family, nhashmask);
182                 hlist_add_head(&x->byspi, nspitable+h);
183         }
184 }
185
186 static unsigned long xfrm_hash_new_size(void)
187 {
188         return ((xfrm_state_hmask + 1) << 1) *
189                 sizeof(struct hlist_head);
190 }
191
192 static DEFINE_MUTEX(hash_resize_mutex);
193
194 static void xfrm_hash_resize(void *__unused)
195 {
196         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
197         unsigned long nsize, osize;
198         unsigned int nhashmask, ohashmask;
199         int i;
200
201         mutex_lock(&hash_resize_mutex);
202
203         nsize = xfrm_hash_new_size();
204         ndst = xfrm_state_hash_alloc(nsize);
205         if (!ndst)
206                 goto out_unlock;
207         nsrc = xfrm_state_hash_alloc(nsize);
208         if (!nsrc) {
209                 xfrm_state_hash_free(ndst, nsize);
210                 goto out_unlock;
211         }
212         nspi = xfrm_state_hash_alloc(nsize);
213         if (!nspi) {
214                 xfrm_state_hash_free(ndst, nsize);
215                 xfrm_state_hash_free(nsrc, nsize);
216                 goto out_unlock;
217         }
218
219         spin_lock_bh(&xfrm_state_lock);
220
221         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
222         for (i = xfrm_state_hmask; i >= 0; i--)
223                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
224                                    nhashmask);
225
226         odst = xfrm_state_bydst;
227         osrc = xfrm_state_bysrc;
228         ospi = xfrm_state_byspi;
229         ohashmask = xfrm_state_hmask;
230
231         xfrm_state_bydst = ndst;
232         xfrm_state_bysrc = nsrc;
233         xfrm_state_byspi = nspi;
234         xfrm_state_hmask = nhashmask;
235
236         spin_unlock_bh(&xfrm_state_lock);
237
238         osize = (ohashmask + 1) * sizeof(struct hlist_head);
239         xfrm_state_hash_free(odst, osize);
240         xfrm_state_hash_free(osrc, osize);
241         xfrm_state_hash_free(ospi, osize);
242
243 out_unlock:
244         mutex_unlock(&hash_resize_mutex);
245 }
246
247 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
248
249 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
250 EXPORT_SYMBOL(km_waitq);
251
252 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
253 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
254
255 static struct work_struct xfrm_state_gc_work;
256 static HLIST_HEAD(xfrm_state_gc_list);
257 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
258
259 int __xfrm_state_delete(struct xfrm_state *x);
260
261 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
262 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
263
264 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
265 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
266
267 static void xfrm_state_gc_destroy(struct xfrm_state *x)
268 {
269         if (del_timer(&x->timer))
270                 BUG();
271         if (del_timer(&x->rtimer))
272                 BUG();
273         kfree(x->aalg);
274         kfree(x->ealg);
275         kfree(x->calg);
276         kfree(x->encap);
277         kfree(x->coaddr);
278         if (x->mode)
279                 xfrm_put_mode(x->mode);
280         if (x->type) {
281                 x->type->destructor(x);
282                 xfrm_put_type(x->type);
283         }
284         security_xfrm_state_free(x);
285         kfree(x);
286 }
287
288 static void xfrm_state_gc_task(void *data)
289 {
290         struct xfrm_state *x;
291         struct hlist_node *entry, *tmp;
292         struct hlist_head gc_list;
293
294         spin_lock_bh(&xfrm_state_gc_lock);
295         gc_list.first = xfrm_state_gc_list.first;
296         INIT_HLIST_HEAD(&xfrm_state_gc_list);
297         spin_unlock_bh(&xfrm_state_gc_lock);
298
299         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
300                 xfrm_state_gc_destroy(x);
301
302         wake_up(&km_waitq);
303 }
304
305 static inline unsigned long make_jiffies(long secs)
306 {
307         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
308                 return MAX_SCHEDULE_TIMEOUT-1;
309         else
310                 return secs*HZ;
311 }
312
313 static void xfrm_timer_handler(unsigned long data)
314 {
315         struct xfrm_state *x = (struct xfrm_state*)data;
316         unsigned long now = (unsigned long)xtime.tv_sec;
317         long next = LONG_MAX;
318         int warn = 0;
319
320         spin_lock(&x->lock);
321         if (x->km.state == XFRM_STATE_DEAD)
322                 goto out;
323         if (x->km.state == XFRM_STATE_EXPIRED)
324                 goto expired;
325         if (x->lft.hard_add_expires_seconds) {
326                 long tmo = x->lft.hard_add_expires_seconds +
327                         x->curlft.add_time - now;
328                 if (tmo <= 0)
329                         goto expired;
330                 if (tmo < next)
331                         next = tmo;
332         }
333         if (x->lft.hard_use_expires_seconds) {
334                 long tmo = x->lft.hard_use_expires_seconds +
335                         (x->curlft.use_time ? : now) - now;
336                 if (tmo <= 0)
337                         goto expired;
338                 if (tmo < next)
339                         next = tmo;
340         }
341         if (x->km.dying)
342                 goto resched;
343         if (x->lft.soft_add_expires_seconds) {
344                 long tmo = x->lft.soft_add_expires_seconds +
345                         x->curlft.add_time - now;
346                 if (tmo <= 0)
347                         warn = 1;
348                 else if (tmo < next)
349                         next = tmo;
350         }
351         if (x->lft.soft_use_expires_seconds) {
352                 long tmo = x->lft.soft_use_expires_seconds +
353                         (x->curlft.use_time ? : now) - now;
354                 if (tmo <= 0)
355                         warn = 1;
356                 else if (tmo < next)
357                         next = tmo;
358         }
359
360         x->km.dying = warn;
361         if (warn)
362                 km_state_expired(x, 0, 0);
363 resched:
364         if (next != LONG_MAX &&
365             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
366                 xfrm_state_hold(x);
367         goto out;
368
369 expired:
370         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
371                 x->km.state = XFRM_STATE_EXPIRED;
372                 wake_up(&km_waitq);
373                 next = 2;
374                 goto resched;
375         }
376         if (!__xfrm_state_delete(x) && x->id.spi)
377                 km_state_expired(x, 1, 0);
378
379 out:
380         spin_unlock(&x->lock);
381         xfrm_state_put(x);
382 }
383
384 static void xfrm_replay_timer_handler(unsigned long data);
385
386 struct xfrm_state *xfrm_state_alloc(void)
387 {
388         struct xfrm_state *x;
389
390         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
391
392         if (x) {
393                 atomic_set(&x->refcnt, 1);
394                 atomic_set(&x->tunnel_users, 0);
395                 INIT_HLIST_NODE(&x->bydst);
396                 INIT_HLIST_NODE(&x->bysrc);
397                 INIT_HLIST_NODE(&x->byspi);
398                 init_timer(&x->timer);
399                 x->timer.function = xfrm_timer_handler;
400                 x->timer.data     = (unsigned long)x;
401                 init_timer(&x->rtimer);
402                 x->rtimer.function = xfrm_replay_timer_handler;
403                 x->rtimer.data     = (unsigned long)x;
404                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
405                 x->lft.soft_byte_limit = XFRM_INF;
406                 x->lft.soft_packet_limit = XFRM_INF;
407                 x->lft.hard_byte_limit = XFRM_INF;
408                 x->lft.hard_packet_limit = XFRM_INF;
409                 x->replay_maxage = 0;
410                 x->replay_maxdiff = 0;
411                 spin_lock_init(&x->lock);
412         }
413         return x;
414 }
415 EXPORT_SYMBOL(xfrm_state_alloc);
416
417 void __xfrm_state_destroy(struct xfrm_state *x)
418 {
419         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
420
421         spin_lock_bh(&xfrm_state_gc_lock);
422         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
423         spin_unlock_bh(&xfrm_state_gc_lock);
424         schedule_work(&xfrm_state_gc_work);
425 }
426 EXPORT_SYMBOL(__xfrm_state_destroy);
427
428 int __xfrm_state_delete(struct xfrm_state *x)
429 {
430         int err = -ESRCH;
431
432         if (x->km.state != XFRM_STATE_DEAD) {
433                 x->km.state = XFRM_STATE_DEAD;
434                 spin_lock(&xfrm_state_lock);
435                 hlist_del(&x->bydst);
436                 __xfrm_state_put(x);
437                 hlist_del(&x->bysrc);
438                 __xfrm_state_put(x);
439                 if (x->id.spi) {
440                         hlist_del(&x->byspi);
441                         __xfrm_state_put(x);
442                 }
443                 xfrm_state_num--;
444                 spin_unlock(&xfrm_state_lock);
445                 if (del_timer(&x->timer))
446                         __xfrm_state_put(x);
447                 if (del_timer(&x->rtimer))
448                         __xfrm_state_put(x);
449
450                 /* All xfrm_state objects are created by xfrm_state_alloc.
451                  * The xfrm_state_alloc call gives a reference, and that
452                  * is what we are dropping here.
453                  */
454                 __xfrm_state_put(x);
455                 err = 0;
456         }
457
458         return err;
459 }
460 EXPORT_SYMBOL(__xfrm_state_delete);
461
462 int xfrm_state_delete(struct xfrm_state *x)
463 {
464         int err;
465
466         spin_lock_bh(&x->lock);
467         err = __xfrm_state_delete(x);
468         spin_unlock_bh(&x->lock);
469
470         return err;
471 }
472 EXPORT_SYMBOL(xfrm_state_delete);
473
474 void xfrm_state_flush(u8 proto)
475 {
476         int i;
477
478         spin_lock_bh(&xfrm_state_lock);
479         for (i = 0; i < xfrm_state_hmask; i++) {
480                 struct hlist_node *entry;
481                 struct xfrm_state *x;
482 restart:
483                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
484                         if (!xfrm_state_kern(x) &&
485                             xfrm_id_proto_match(x->id.proto, proto)) {
486                                 xfrm_state_hold(x);
487                                 spin_unlock_bh(&xfrm_state_lock);
488
489                                 xfrm_state_delete(x);
490                                 xfrm_state_put(x);
491
492                                 spin_lock_bh(&xfrm_state_lock);
493                                 goto restart;
494                         }
495                 }
496         }
497         spin_unlock_bh(&xfrm_state_lock);
498         wake_up(&km_waitq);
499 }
500 EXPORT_SYMBOL(xfrm_state_flush);
501
502 static int
503 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
504                   struct xfrm_tmpl *tmpl,
505                   xfrm_address_t *daddr, xfrm_address_t *saddr,
506                   unsigned short family)
507 {
508         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
509         if (!afinfo)
510                 return -1;
511         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
512         xfrm_state_put_afinfo(afinfo);
513         return 0;
514 }
515
516 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
517 {
518         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
519         struct xfrm_state *x;
520         struct hlist_node *entry;
521
522         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
523                 if (x->props.family != family ||
524                     x->id.spi       != spi ||
525                     x->id.proto     != proto)
526                         continue;
527
528                 switch (family) {
529                 case AF_INET:
530                         if (x->id.daddr.a4 != daddr->a4)
531                                 continue;
532                         break;
533                 case AF_INET6:
534                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
535                                              (struct in6_addr *)
536                                              x->id.daddr.a6))
537                                 continue;
538                         break;
539                 };
540
541                 xfrm_state_hold(x);
542                 return x;
543         }
544
545         return NULL;
546 }
547
548 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
549 {
550         unsigned int h = xfrm_src_hash(saddr, family);
551         struct xfrm_state *x;
552         struct hlist_node *entry;
553
554         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
555                 if (x->props.family != family ||
556                     x->id.proto     != proto)
557                         continue;
558
559                 switch (family) {
560                 case AF_INET:
561                         if (x->id.daddr.a4 != daddr->a4 ||
562                             x->props.saddr.a4 != saddr->a4)
563                                 continue;
564                         break;
565                 case AF_INET6:
566                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
567                                              (struct in6_addr *)
568                                              x->id.daddr.a6) ||
569                             !ipv6_addr_equal((struct in6_addr *)saddr,
570                                              (struct in6_addr *)
571                                              x->props.saddr.a6))
572                                 continue;
573                         break;
574                 };
575
576                 xfrm_state_hold(x);
577                 return x;
578         }
579
580         return NULL;
581 }
582
583 static inline struct xfrm_state *
584 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
585 {
586         if (use_spi)
587                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
588                                            x->id.proto, family);
589         else
590                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
591                                                   &x->props.saddr,
592                                                   x->id.proto, family);
593 }
594
595 struct xfrm_state *
596 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
597                 struct flowi *fl, struct xfrm_tmpl *tmpl,
598                 struct xfrm_policy *pol, int *err,
599                 unsigned short family)
600 {
601         unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family);
602         struct hlist_node *entry;
603         struct xfrm_state *x, *x0;
604         int acquire_in_progress = 0;
605         int error = 0;
606         struct xfrm_state *best = NULL;
607         
608         spin_lock_bh(&xfrm_state_lock);
609         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
610                 if (x->props.family == family &&
611                     x->props.reqid == tmpl->reqid &&
612                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
613                     xfrm_state_addr_check(x, daddr, saddr, family) &&
614                     tmpl->mode == x->props.mode &&
615                     tmpl->id.proto == x->id.proto &&
616                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
617                         /* Resolution logic:
618                            1. There is a valid state with matching selector.
619                               Done.
620                            2. Valid state with inappropriate selector. Skip.
621
622                            Entering area of "sysdeps".
623
624                            3. If state is not valid, selector is temporary,
625                               it selects only session which triggered
626                               previous resolution. Key manager will do
627                               something to install a state with proper
628                               selector.
629                          */
630                         if (x->km.state == XFRM_STATE_VALID) {
631                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
632                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
633                                         continue;
634                                 if (!best ||
635                                     best->km.dying > x->km.dying ||
636                                     (best->km.dying == x->km.dying &&
637                                      best->curlft.add_time < x->curlft.add_time))
638                                         best = x;
639                         } else if (x->km.state == XFRM_STATE_ACQ) {
640                                 acquire_in_progress = 1;
641                         } else if (x->km.state == XFRM_STATE_ERROR ||
642                                    x->km.state == XFRM_STATE_EXPIRED) {
643                                 if (xfrm_selector_match(&x->sel, fl, family) &&
644                                     security_xfrm_state_pol_flow_match(x, pol, fl))
645                                         error = -ESRCH;
646                         }
647                 }
648         }
649
650         x = best;
651         if (!x && !error && !acquire_in_progress) {
652                 if (tmpl->id.spi &&
653                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
654                                               tmpl->id.proto, family)) != NULL) {
655                         xfrm_state_put(x0);
656                         error = -EEXIST;
657                         goto out;
658                 }
659                 x = xfrm_state_alloc();
660                 if (x == NULL) {
661                         error = -ENOMEM;
662                         goto out;
663                 }
664                 /* Initialize temporary selector matching only
665                  * to current session. */
666                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
667
668                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
669                 if (error) {
670                         x->km.state = XFRM_STATE_DEAD;
671                         xfrm_state_put(x);
672                         x = NULL;
673                         goto out;
674                 }
675
676                 if (km_query(x, tmpl, pol) == 0) {
677                         x->km.state = XFRM_STATE_ACQ;
678                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
679                         xfrm_state_hold(x);
680                         h = xfrm_src_hash(saddr, family);
681                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
682                         xfrm_state_hold(x);
683                         if (x->id.spi) {
684                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
685                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
686                                 xfrm_state_hold(x);
687                         }
688                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
689                         xfrm_state_hold(x);
690                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
691                         add_timer(&x->timer);
692                 } else {
693                         x->km.state = XFRM_STATE_DEAD;
694                         xfrm_state_put(x);
695                         x = NULL;
696                         error = -ESRCH;
697                 }
698         }
699 out:
700         if (x)
701                 xfrm_state_hold(x);
702         else
703                 *err = acquire_in_progress ? -EAGAIN : error;
704         spin_unlock_bh(&xfrm_state_lock);
705         return x;
706 }
707
708 static void __xfrm_state_insert(struct xfrm_state *x)
709 {
710         unsigned int h;
711
712         x->genid = ++xfrm_state_genid;
713
714         h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
715         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
716         xfrm_state_hold(x);
717
718         h = xfrm_src_hash(&x->props.saddr, x->props.family);
719         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
720         xfrm_state_hold(x);
721
722         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
723                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
724                                   x->props.family);
725
726                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
727                 xfrm_state_hold(x);
728         }
729
730         if (!mod_timer(&x->timer, jiffies + HZ))
731                 xfrm_state_hold(x);
732
733         if (x->replay_maxage &&
734             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
735                 xfrm_state_hold(x);
736
737         wake_up(&km_waitq);
738
739         xfrm_state_num++;
740
741         if (x->bydst.next != NULL &&
742             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
743             xfrm_state_num > xfrm_state_hmask)
744                 schedule_work(&xfrm_hash_work);
745 }
746
747 /* xfrm_state_lock is held */
748 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
749 {
750         unsigned short family = xnew->props.family;
751         u32 reqid = xnew->props.reqid;
752         struct xfrm_state *x;
753         struct hlist_node *entry;
754         unsigned int h;
755
756         h = xfrm_dst_hash(&xnew->id.daddr, reqid, family);
757         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
758                 if (x->props.family     == family &&
759                     x->props.reqid      == reqid &&
760                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family))
761                         x->genid = xfrm_state_genid;
762         }
763 }
764
765 void xfrm_state_insert(struct xfrm_state *x)
766 {
767         spin_lock_bh(&xfrm_state_lock);
768         __xfrm_state_bump_genids(x);
769         __xfrm_state_insert(x);
770         spin_unlock_bh(&xfrm_state_lock);
771 }
772 EXPORT_SYMBOL(xfrm_state_insert);
773
774 /* xfrm_state_lock is held */
775 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
776 {
777         unsigned int h = xfrm_dst_hash(daddr, reqid, family);
778         struct hlist_node *entry;
779         struct xfrm_state *x;
780
781         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
782                 if (x->props.reqid  != reqid ||
783                     x->props.mode   != mode ||
784                     x->props.family != family ||
785                     x->km.state     != XFRM_STATE_ACQ ||
786                     x->id.spi       != 0)
787                         continue;
788
789                 switch (family) {
790                 case AF_INET:
791                         if (x->id.daddr.a4    != daddr->a4 ||
792                             x->props.saddr.a4 != saddr->a4)
793                                 continue;
794                         break;
795                 case AF_INET6:
796                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
797                                              (struct in6_addr *)daddr) ||
798                             !ipv6_addr_equal((struct in6_addr *)
799                                              x->props.saddr.a6,
800                                              (struct in6_addr *)saddr))
801                                 continue;
802                         break;
803                 };
804
805                 xfrm_state_hold(x);
806                 return x;
807         }
808
809         if (!create)
810                 return NULL;
811
812         x = xfrm_state_alloc();
813         if (likely(x)) {
814                 switch (family) {
815                 case AF_INET:
816                         x->sel.daddr.a4 = daddr->a4;
817                         x->sel.saddr.a4 = saddr->a4;
818                         x->sel.prefixlen_d = 32;
819                         x->sel.prefixlen_s = 32;
820                         x->props.saddr.a4 = saddr->a4;
821                         x->id.daddr.a4 = daddr->a4;
822                         break;
823
824                 case AF_INET6:
825                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
826                                        (struct in6_addr *)daddr);
827                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
828                                        (struct in6_addr *)saddr);
829                         x->sel.prefixlen_d = 128;
830                         x->sel.prefixlen_s = 128;
831                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
832                                        (struct in6_addr *)saddr);
833                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
834                                        (struct in6_addr *)daddr);
835                         break;
836                 };
837
838                 x->km.state = XFRM_STATE_ACQ;
839                 x->id.proto = proto;
840                 x->props.family = family;
841                 x->props.mode = mode;
842                 x->props.reqid = reqid;
843                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
844                 xfrm_state_hold(x);
845                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
846                 add_timer(&x->timer);
847                 xfrm_state_hold(x);
848                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
849                 h = xfrm_src_hash(saddr, family);
850                 xfrm_state_hold(x);
851                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
852                 wake_up(&km_waitq);
853         }
854
855         return x;
856 }
857
858 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
859
860 int xfrm_state_add(struct xfrm_state *x)
861 {
862         struct xfrm_state *x1;
863         int family;
864         int err;
865         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
866
867         family = x->props.family;
868
869         spin_lock_bh(&xfrm_state_lock);
870
871         x1 = __xfrm_state_locate(x, use_spi, family);
872         if (x1) {
873                 xfrm_state_put(x1);
874                 x1 = NULL;
875                 err = -EEXIST;
876                 goto out;
877         }
878
879         if (use_spi && x->km.seq) {
880                 x1 = __xfrm_find_acq_byseq(x->km.seq);
881                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
882                         xfrm_state_put(x1);
883                         x1 = NULL;
884                 }
885         }
886
887         if (use_spi && !x1)
888                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
889                                      x->id.proto,
890                                      &x->id.daddr, &x->props.saddr, 0);
891
892         __xfrm_state_bump_genids(x);
893         __xfrm_state_insert(x);
894         err = 0;
895
896 out:
897         spin_unlock_bh(&xfrm_state_lock);
898
899         if (x1) {
900                 xfrm_state_delete(x1);
901                 xfrm_state_put(x1);
902         }
903
904         return err;
905 }
906 EXPORT_SYMBOL(xfrm_state_add);
907
908 int xfrm_state_update(struct xfrm_state *x)
909 {
910         struct xfrm_state *x1;
911         int err;
912         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
913
914         spin_lock_bh(&xfrm_state_lock);
915         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
916
917         err = -ESRCH;
918         if (!x1)
919                 goto out;
920
921         if (xfrm_state_kern(x1)) {
922                 xfrm_state_put(x1);
923                 err = -EEXIST;
924                 goto out;
925         }
926
927         if (x1->km.state == XFRM_STATE_ACQ) {
928                 __xfrm_state_insert(x);
929                 x = NULL;
930         }
931         err = 0;
932
933 out:
934         spin_unlock_bh(&xfrm_state_lock);
935
936         if (err)
937                 return err;
938
939         if (!x) {
940                 xfrm_state_delete(x1);
941                 xfrm_state_put(x1);
942                 return 0;
943         }
944
945         err = -EINVAL;
946         spin_lock_bh(&x1->lock);
947         if (likely(x1->km.state == XFRM_STATE_VALID)) {
948                 if (x->encap && x1->encap)
949                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
950                 if (x->coaddr && x1->coaddr) {
951                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
952                 }
953                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
954                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
955                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
956                 x1->km.dying = 0;
957
958                 if (!mod_timer(&x1->timer, jiffies + HZ))
959                         xfrm_state_hold(x1);
960                 if (x1->curlft.use_time)
961                         xfrm_state_check_expire(x1);
962
963                 err = 0;
964         }
965         spin_unlock_bh(&x1->lock);
966
967         xfrm_state_put(x1);
968
969         return err;
970 }
971 EXPORT_SYMBOL(xfrm_state_update);
972
973 int xfrm_state_check_expire(struct xfrm_state *x)
974 {
975         if (!x->curlft.use_time)
976                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
977
978         if (x->km.state != XFRM_STATE_VALID)
979                 return -EINVAL;
980
981         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
982             x->curlft.packets >= x->lft.hard_packet_limit) {
983                 x->km.state = XFRM_STATE_EXPIRED;
984                 if (!mod_timer(&x->timer, jiffies))
985                         xfrm_state_hold(x);
986                 return -EINVAL;
987         }
988
989         if (!x->km.dying &&
990             (x->curlft.bytes >= x->lft.soft_byte_limit ||
991              x->curlft.packets >= x->lft.soft_packet_limit)) {
992                 x->km.dying = 1;
993                 km_state_expired(x, 0, 0);
994         }
995         return 0;
996 }
997 EXPORT_SYMBOL(xfrm_state_check_expire);
998
999 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1000 {
1001         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1002                 - skb_headroom(skb);
1003
1004         if (nhead > 0)
1005                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1006
1007         /* Check tail too... */
1008         return 0;
1009 }
1010
1011 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1012 {
1013         int err = xfrm_state_check_expire(x);
1014         if (err < 0)
1015                 goto err;
1016         err = xfrm_state_check_space(x, skb);
1017 err:
1018         return err;
1019 }
1020 EXPORT_SYMBOL(xfrm_state_check);
1021
1022 struct xfrm_state *
1023 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1024                   unsigned short family)
1025 {
1026         struct xfrm_state *x;
1027
1028         spin_lock_bh(&xfrm_state_lock);
1029         x = __xfrm_state_lookup(daddr, spi, proto, family);
1030         spin_unlock_bh(&xfrm_state_lock);
1031         return x;
1032 }
1033 EXPORT_SYMBOL(xfrm_state_lookup);
1034
1035 struct xfrm_state *
1036 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1037                          u8 proto, unsigned short family)
1038 {
1039         struct xfrm_state *x;
1040
1041         spin_lock_bh(&xfrm_state_lock);
1042         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1043         spin_unlock_bh(&xfrm_state_lock);
1044         return x;
1045 }
1046 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1047
1048 struct xfrm_state *
1049 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
1050               xfrm_address_t *daddr, xfrm_address_t *saddr, 
1051               int create, unsigned short family)
1052 {
1053         struct xfrm_state *x;
1054
1055         spin_lock_bh(&xfrm_state_lock);
1056         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1057         spin_unlock_bh(&xfrm_state_lock);
1058
1059         return x;
1060 }
1061 EXPORT_SYMBOL(xfrm_find_acq);
1062
1063 #ifdef CONFIG_XFRM_SUB_POLICY
1064 int
1065 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1066                unsigned short family)
1067 {
1068         int err = 0;
1069         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1070         if (!afinfo)
1071                 return -EAFNOSUPPORT;
1072
1073         spin_lock_bh(&xfrm_state_lock);
1074         if (afinfo->tmpl_sort)
1075                 err = afinfo->tmpl_sort(dst, src, n);
1076         spin_unlock_bh(&xfrm_state_lock);
1077         xfrm_state_put_afinfo(afinfo);
1078         return err;
1079 }
1080 EXPORT_SYMBOL(xfrm_tmpl_sort);
1081
1082 int
1083 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1084                 unsigned short family)
1085 {
1086         int err = 0;
1087         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1088         if (!afinfo)
1089                 return -EAFNOSUPPORT;
1090
1091         spin_lock_bh(&xfrm_state_lock);
1092         if (afinfo->state_sort)
1093                 err = afinfo->state_sort(dst, src, n);
1094         spin_unlock_bh(&xfrm_state_lock);
1095         xfrm_state_put_afinfo(afinfo);
1096         return err;
1097 }
1098 EXPORT_SYMBOL(xfrm_state_sort);
1099 #endif
1100
1101 /* Silly enough, but I'm lazy to build resolution list */
1102
1103 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1104 {
1105         int i;
1106
1107         for (i = 0; i <= xfrm_state_hmask; i++) {
1108                 struct hlist_node *entry;
1109                 struct xfrm_state *x;
1110
1111                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1112                         if (x->km.seq == seq &&
1113                             x->km.state == XFRM_STATE_ACQ) {
1114                                 xfrm_state_hold(x);
1115                                 return x;
1116                         }
1117                 }
1118         }
1119         return NULL;
1120 }
1121
1122 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1123 {
1124         struct xfrm_state *x;
1125
1126         spin_lock_bh(&xfrm_state_lock);
1127         x = __xfrm_find_acq_byseq(seq);
1128         spin_unlock_bh(&xfrm_state_lock);
1129         return x;
1130 }
1131 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1132
1133 u32 xfrm_get_acqseq(void)
1134 {
1135         u32 res;
1136         static u32 acqseq;
1137         static DEFINE_SPINLOCK(acqseq_lock);
1138
1139         spin_lock_bh(&acqseq_lock);
1140         res = (++acqseq ? : ++acqseq);
1141         spin_unlock_bh(&acqseq_lock);
1142         return res;
1143 }
1144 EXPORT_SYMBOL(xfrm_get_acqseq);
1145
1146 void
1147 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1148 {
1149         unsigned int h;
1150         struct xfrm_state *x0;
1151
1152         if (x->id.spi)
1153                 return;
1154
1155         if (minspi == maxspi) {
1156                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1157                 if (x0) {
1158                         xfrm_state_put(x0);
1159                         return;
1160                 }
1161                 x->id.spi = minspi;
1162         } else {
1163                 u32 spi = 0;
1164                 minspi = ntohl(minspi);
1165                 maxspi = ntohl(maxspi);
1166                 for (h=0; h<maxspi-minspi+1; h++) {
1167                         spi = minspi + net_random()%(maxspi-minspi+1);
1168                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1169                         if (x0 == NULL) {
1170                                 x->id.spi = htonl(spi);
1171                                 break;
1172                         }
1173                         xfrm_state_put(x0);
1174                 }
1175         }
1176         if (x->id.spi) {
1177                 spin_lock_bh(&xfrm_state_lock);
1178                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1179                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1180                 xfrm_state_hold(x);
1181                 spin_unlock_bh(&xfrm_state_lock);
1182                 wake_up(&km_waitq);
1183         }
1184 }
1185 EXPORT_SYMBOL(xfrm_alloc_spi);
1186
1187 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1188                     void *data)
1189 {
1190         int i;
1191         struct xfrm_state *x;
1192         struct hlist_node *entry;
1193         int count = 0;
1194         int err = 0;
1195
1196         spin_lock_bh(&xfrm_state_lock);
1197         for (i = 0; i <= xfrm_state_hmask; i++) {
1198                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1199                         if (xfrm_id_proto_match(x->id.proto, proto))
1200                                 count++;
1201                 }
1202         }
1203         if (count == 0) {
1204                 err = -ENOENT;
1205                 goto out;
1206         }
1207
1208         for (i = 0; i <= xfrm_state_hmask; i++) {
1209                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1210                         if (!xfrm_id_proto_match(x->id.proto, proto))
1211                                 continue;
1212                         err = func(x, --count, data);
1213                         if (err)
1214                                 goto out;
1215                 }
1216         }
1217 out:
1218         spin_unlock_bh(&xfrm_state_lock);
1219         return err;
1220 }
1221 EXPORT_SYMBOL(xfrm_state_walk);
1222
1223
1224 void xfrm_replay_notify(struct xfrm_state *x, int event)
1225 {
1226         struct km_event c;
1227         /* we send notify messages in case
1228          *  1. we updated on of the sequence numbers, and the seqno difference
1229          *     is at least x->replay_maxdiff, in this case we also update the
1230          *     timeout of our timer function
1231          *  2. if x->replay_maxage has elapsed since last update,
1232          *     and there were changes
1233          *
1234          *  The state structure must be locked!
1235          */
1236
1237         switch (event) {
1238         case XFRM_REPLAY_UPDATE:
1239                 if (x->replay_maxdiff &&
1240                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1241                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1242                         if (x->xflags & XFRM_TIME_DEFER)
1243                                 event = XFRM_REPLAY_TIMEOUT;
1244                         else
1245                                 return;
1246                 }
1247
1248                 break;
1249
1250         case XFRM_REPLAY_TIMEOUT:
1251                 if ((x->replay.seq == x->preplay.seq) &&
1252                     (x->replay.bitmap == x->preplay.bitmap) &&
1253                     (x->replay.oseq == x->preplay.oseq)) {
1254                         x->xflags |= XFRM_TIME_DEFER;
1255                         return;
1256                 }
1257
1258                 break;
1259         }
1260
1261         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1262         c.event = XFRM_MSG_NEWAE;
1263         c.data.aevent = event;
1264         km_state_notify(x, &c);
1265
1266         if (x->replay_maxage &&
1267             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1268                 xfrm_state_hold(x);
1269                 x->xflags &= ~XFRM_TIME_DEFER;
1270         }
1271 }
1272 EXPORT_SYMBOL(xfrm_replay_notify);
1273
1274 static void xfrm_replay_timer_handler(unsigned long data)
1275 {
1276         struct xfrm_state *x = (struct xfrm_state*)data;
1277
1278         spin_lock(&x->lock);
1279
1280         if (x->km.state == XFRM_STATE_VALID) {
1281                 if (xfrm_aevent_is_on())
1282                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1283                 else
1284                         x->xflags |= XFRM_TIME_DEFER;
1285         }
1286
1287         spin_unlock(&x->lock);
1288         xfrm_state_put(x);
1289 }
1290
1291 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1292 {
1293         u32 diff;
1294
1295         seq = ntohl(seq);
1296
1297         if (unlikely(seq == 0))
1298                 return -EINVAL;
1299
1300         if (likely(seq > x->replay.seq))
1301                 return 0;
1302
1303         diff = x->replay.seq - seq;
1304         if (diff >= x->props.replay_window) {
1305                 x->stats.replay_window++;
1306                 return -EINVAL;
1307         }
1308
1309         if (x->replay.bitmap & (1U << diff)) {
1310                 x->stats.replay++;
1311                 return -EINVAL;
1312         }
1313         return 0;
1314 }
1315 EXPORT_SYMBOL(xfrm_replay_check);
1316
1317 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1318 {
1319         u32 diff;
1320
1321         seq = ntohl(seq);
1322
1323         if (seq > x->replay.seq) {
1324                 diff = seq - x->replay.seq;
1325                 if (diff < x->props.replay_window)
1326                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1327                 else
1328                         x->replay.bitmap = 1;
1329                 x->replay.seq = seq;
1330         } else {
1331                 diff = x->replay.seq - seq;
1332                 x->replay.bitmap |= (1U << diff);
1333         }
1334
1335         if (xfrm_aevent_is_on())
1336                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1337 }
1338 EXPORT_SYMBOL(xfrm_replay_advance);
1339
1340 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1341 static DEFINE_RWLOCK(xfrm_km_lock);
1342
1343 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1344 {
1345         struct xfrm_mgr *km;
1346
1347         read_lock(&xfrm_km_lock);
1348         list_for_each_entry(km, &xfrm_km_list, list)
1349                 if (km->notify_policy)
1350                         km->notify_policy(xp, dir, c);
1351         read_unlock(&xfrm_km_lock);
1352 }
1353
1354 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1355 {
1356         struct xfrm_mgr *km;
1357         read_lock(&xfrm_km_lock);
1358         list_for_each_entry(km, &xfrm_km_list, list)
1359                 if (km->notify)
1360                         km->notify(x, c);
1361         read_unlock(&xfrm_km_lock);
1362 }
1363
1364 EXPORT_SYMBOL(km_policy_notify);
1365 EXPORT_SYMBOL(km_state_notify);
1366
1367 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1368 {
1369         struct km_event c;
1370
1371         c.data.hard = hard;
1372         c.pid = pid;
1373         c.event = XFRM_MSG_EXPIRE;
1374         km_state_notify(x, &c);
1375
1376         if (hard)
1377                 wake_up(&km_waitq);
1378 }
1379
1380 EXPORT_SYMBOL(km_state_expired);
1381 /*
1382  * We send to all registered managers regardless of failure
1383  * We are happy with one success
1384 */
1385 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1386 {
1387         int err = -EINVAL, acqret;
1388         struct xfrm_mgr *km;
1389
1390         read_lock(&xfrm_km_lock);
1391         list_for_each_entry(km, &xfrm_km_list, list) {
1392                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1393                 if (!acqret)
1394                         err = acqret;
1395         }
1396         read_unlock(&xfrm_km_lock);
1397         return err;
1398 }
1399 EXPORT_SYMBOL(km_query);
1400
1401 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1402 {
1403         int err = -EINVAL;
1404         struct xfrm_mgr *km;
1405
1406         read_lock(&xfrm_km_lock);
1407         list_for_each_entry(km, &xfrm_km_list, list) {
1408                 if (km->new_mapping)
1409                         err = km->new_mapping(x, ipaddr, sport);
1410                 if (!err)
1411                         break;
1412         }
1413         read_unlock(&xfrm_km_lock);
1414         return err;
1415 }
1416 EXPORT_SYMBOL(km_new_mapping);
1417
1418 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1419 {
1420         struct km_event c;
1421
1422         c.data.hard = hard;
1423         c.pid = pid;
1424         c.event = XFRM_MSG_POLEXPIRE;
1425         km_policy_notify(pol, dir, &c);
1426
1427         if (hard)
1428                 wake_up(&km_waitq);
1429 }
1430 EXPORT_SYMBOL(km_policy_expired);
1431
1432 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1433 {
1434         int err = -EINVAL;
1435         int ret;
1436         struct xfrm_mgr *km;
1437
1438         read_lock(&xfrm_km_lock);
1439         list_for_each_entry(km, &xfrm_km_list, list) {
1440                 if (km->report) {
1441                         ret = km->report(proto, sel, addr);
1442                         if (!ret)
1443                                 err = ret;
1444                 }
1445         }
1446         read_unlock(&xfrm_km_lock);
1447         return err;
1448 }
1449 EXPORT_SYMBOL(km_report);
1450
1451 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1452 {
1453         int err;
1454         u8 *data;
1455         struct xfrm_mgr *km;
1456         struct xfrm_policy *pol = NULL;
1457
1458         if (optlen <= 0 || optlen > PAGE_SIZE)
1459                 return -EMSGSIZE;
1460
1461         data = kmalloc(optlen, GFP_KERNEL);
1462         if (!data)
1463                 return -ENOMEM;
1464
1465         err = -EFAULT;
1466         if (copy_from_user(data, optval, optlen))
1467                 goto out;
1468
1469         err = -EINVAL;
1470         read_lock(&xfrm_km_lock);
1471         list_for_each_entry(km, &xfrm_km_list, list) {
1472                 pol = km->compile_policy(sk, optname, data,
1473                                          optlen, &err);
1474                 if (err >= 0)
1475                         break;
1476         }
1477         read_unlock(&xfrm_km_lock);
1478
1479         if (err >= 0) {
1480                 xfrm_sk_policy_insert(sk, err, pol);
1481                 xfrm_pol_put(pol);
1482                 err = 0;
1483         }
1484
1485 out:
1486         kfree(data);
1487         return err;
1488 }
1489 EXPORT_SYMBOL(xfrm_user_policy);
1490
1491 int xfrm_register_km(struct xfrm_mgr *km)
1492 {
1493         write_lock_bh(&xfrm_km_lock);
1494         list_add_tail(&km->list, &xfrm_km_list);
1495         write_unlock_bh(&xfrm_km_lock);
1496         return 0;
1497 }
1498 EXPORT_SYMBOL(xfrm_register_km);
1499
1500 int xfrm_unregister_km(struct xfrm_mgr *km)
1501 {
1502         write_lock_bh(&xfrm_km_lock);
1503         list_del(&km->list);
1504         write_unlock_bh(&xfrm_km_lock);
1505         return 0;
1506 }
1507 EXPORT_SYMBOL(xfrm_unregister_km);
1508
1509 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1510 {
1511         int err = 0;
1512         if (unlikely(afinfo == NULL))
1513                 return -EINVAL;
1514         if (unlikely(afinfo->family >= NPROTO))
1515                 return -EAFNOSUPPORT;
1516         write_lock_bh(&xfrm_state_afinfo_lock);
1517         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1518                 err = -ENOBUFS;
1519         else
1520                 xfrm_state_afinfo[afinfo->family] = afinfo;
1521         write_unlock_bh(&xfrm_state_afinfo_lock);
1522         return err;
1523 }
1524 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1525
1526 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1527 {
1528         int err = 0;
1529         if (unlikely(afinfo == NULL))
1530                 return -EINVAL;
1531         if (unlikely(afinfo->family >= NPROTO))
1532                 return -EAFNOSUPPORT;
1533         write_lock_bh(&xfrm_state_afinfo_lock);
1534         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1535                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1536                         err = -EINVAL;
1537                 else
1538                         xfrm_state_afinfo[afinfo->family] = NULL;
1539         }
1540         write_unlock_bh(&xfrm_state_afinfo_lock);
1541         return err;
1542 }
1543 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1544
1545 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1546 {
1547         struct xfrm_state_afinfo *afinfo;
1548         if (unlikely(family >= NPROTO))
1549                 return NULL;
1550         read_lock(&xfrm_state_afinfo_lock);
1551         afinfo = xfrm_state_afinfo[family];
1552         if (unlikely(!afinfo))
1553                 read_unlock(&xfrm_state_afinfo_lock);
1554         return afinfo;
1555 }
1556
1557 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1558 {
1559         read_unlock(&xfrm_state_afinfo_lock);
1560 }
1561
1562 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1563 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1564 {
1565         if (x->tunnel) {
1566                 struct xfrm_state *t = x->tunnel;
1567
1568                 if (atomic_read(&t->tunnel_users) == 2)
1569                         xfrm_state_delete(t);
1570                 atomic_dec(&t->tunnel_users);
1571                 xfrm_state_put(t);
1572                 x->tunnel = NULL;
1573         }
1574 }
1575 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1576
1577 /*
1578  * This function is NOT optimal.  For example, with ESP it will give an
1579  * MTU that's usually two bytes short of being optimal.  However, it will
1580  * usually give an answer that's a multiple of 4 provided the input is
1581  * also a multiple of 4.
1582  */
1583 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1584 {
1585         int res = mtu;
1586
1587         res -= x->props.header_len;
1588
1589         for (;;) {
1590                 int m = res;
1591
1592                 if (m < 68)
1593                         return 68;
1594
1595                 spin_lock_bh(&x->lock);
1596                 if (x->km.state == XFRM_STATE_VALID &&
1597                     x->type && x->type->get_max_size)
1598                         m = x->type->get_max_size(x, m);
1599                 else
1600                         m += x->props.header_len;
1601                 spin_unlock_bh(&x->lock);
1602
1603                 if (m <= mtu)
1604                         break;
1605                 res -= (m - mtu);
1606         }
1607
1608         return res;
1609 }
1610
1611 int xfrm_init_state(struct xfrm_state *x)
1612 {
1613         struct xfrm_state_afinfo *afinfo;
1614         int family = x->props.family;
1615         int err;
1616
1617         err = -EAFNOSUPPORT;
1618         afinfo = xfrm_state_get_afinfo(family);
1619         if (!afinfo)
1620                 goto error;
1621
1622         err = 0;
1623         if (afinfo->init_flags)
1624                 err = afinfo->init_flags(x);
1625
1626         xfrm_state_put_afinfo(afinfo);
1627
1628         if (err)
1629                 goto error;
1630
1631         err = -EPROTONOSUPPORT;
1632         x->type = xfrm_get_type(x->id.proto, family);
1633         if (x->type == NULL)
1634                 goto error;
1635
1636         err = x->type->init_state(x);
1637         if (err)
1638                 goto error;
1639
1640         x->mode = xfrm_get_mode(x->props.mode, family);
1641         if (x->mode == NULL)
1642                 goto error;
1643
1644         x->km.state = XFRM_STATE_VALID;
1645
1646 error:
1647         return err;
1648 }
1649
1650 EXPORT_SYMBOL(xfrm_init_state);
1651  
1652 void __init xfrm_state_init(void)
1653 {
1654         unsigned int sz;
1655
1656         sz = sizeof(struct hlist_head) * 8;
1657
1658         xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1659         xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1660         xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1661         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1662                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1663         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1664
1665         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1666 }
1667