cpufreq: intel_pstate: Add io_boost trace
[cascardo/linux.git] / net / netfilter / nf_conntrack_standalone.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.
8  */
9
10 #include <linux/types.h>
11 #include <linux/netfilter.h>
12 #include <linux/slab.h>
13 #include <linux/module.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/percpu.h>
18 #include <linux/netdevice.h>
19 #include <linux/security.h>
20 #include <net/net_namespace.h>
21 #ifdef CONFIG_SYSCTL
22 #include <linux/sysctl.h>
23 #endif
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_l3proto.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_acct.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
33 #include <net/netfilter/nf_conntrack_timestamp.h>
34 #include <linux/rculist_nulls.h>
35
36 MODULE_LICENSE("GPL");
37
38 #ifdef CONFIG_NF_CONNTRACK_PROCFS
39 void
40 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
41             const struct nf_conntrack_l3proto *l3proto,
42             const struct nf_conntrack_l4proto *l4proto)
43 {
44         l3proto->print_tuple(s, tuple);
45         l4proto->print_tuple(s, tuple);
46 }
47 EXPORT_SYMBOL_GPL(print_tuple);
48
49 struct ct_iter_state {
50         struct seq_net_private p;
51         struct hlist_nulls_head *hash;
52         unsigned int htable_size;
53         unsigned int bucket;
54         u_int64_t time_now;
55 };
56
57 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
58 {
59         struct ct_iter_state *st = seq->private;
60         struct hlist_nulls_node *n;
61
62         for (st->bucket = 0;
63              st->bucket < st->htable_size;
64              st->bucket++) {
65                 n = rcu_dereference(
66                         hlist_nulls_first_rcu(&st->hash[st->bucket]));
67                 if (!is_a_nulls(n))
68                         return n;
69         }
70         return NULL;
71 }
72
73 static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
74                                       struct hlist_nulls_node *head)
75 {
76         struct ct_iter_state *st = seq->private;
77
78         head = rcu_dereference(hlist_nulls_next_rcu(head));
79         while (is_a_nulls(head)) {
80                 if (likely(get_nulls_value(head) == st->bucket)) {
81                         if (++st->bucket >= st->htable_size)
82                                 return NULL;
83                 }
84                 head = rcu_dereference(
85                         hlist_nulls_first_rcu(&st->hash[st->bucket]));
86         }
87         return head;
88 }
89
90 static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
91 {
92         struct hlist_nulls_node *head = ct_get_first(seq);
93
94         if (head)
95                 while (pos && (head = ct_get_next(seq, head)))
96                         pos--;
97         return pos ? NULL : head;
98 }
99
100 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
101         __acquires(RCU)
102 {
103         struct ct_iter_state *st = seq->private;
104
105         st->time_now = ktime_get_real_ns();
106         rcu_read_lock();
107
108         nf_conntrack_get_ht(&st->hash, &st->htable_size);
109         return ct_get_idx(seq, *pos);
110 }
111
112 static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
113 {
114         (*pos)++;
115         return ct_get_next(s, v);
116 }
117
118 static void ct_seq_stop(struct seq_file *s, void *v)
119         __releases(RCU)
120 {
121         rcu_read_unlock();
122 }
123
124 #ifdef CONFIG_NF_CONNTRACK_SECMARK
125 static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
126 {
127         int ret;
128         u32 len;
129         char *secctx;
130
131         ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
132         if (ret)
133                 return;
134
135         seq_printf(s, "secctx=%s ", secctx);
136
137         security_release_secctx(secctx, len);
138 }
139 #else
140 static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
141 {
142 }
143 #endif
144
145 #ifdef CONFIG_NF_CONNTRACK_ZONES
146 static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
147                          int dir)
148 {
149         const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
150
151         if (zone->dir != dir)
152                 return;
153         switch (zone->dir) {
154         case NF_CT_DEFAULT_ZONE_DIR:
155                 seq_printf(s, "zone=%u ", zone->id);
156                 break;
157         case NF_CT_ZONE_DIR_ORIG:
158                 seq_printf(s, "zone-orig=%u ", zone->id);
159                 break;
160         case NF_CT_ZONE_DIR_REPL:
161                 seq_printf(s, "zone-reply=%u ", zone->id);
162                 break;
163         default:
164                 break;
165         }
166 }
167 #else
168 static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
169                                 int dir)
170 {
171 }
172 #endif
173
174 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
175 static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
176 {
177         struct ct_iter_state *st = s->private;
178         struct nf_conn_tstamp *tstamp;
179         s64 delta_time;
180
181         tstamp = nf_conn_tstamp_find(ct);
182         if (tstamp) {
183                 delta_time = st->time_now - tstamp->start;
184                 if (delta_time > 0)
185                         delta_time = div_s64(delta_time, NSEC_PER_SEC);
186                 else
187                         delta_time = 0;
188
189                 seq_printf(s, "delta-time=%llu ",
190                            (unsigned long long)delta_time);
191         }
192         return;
193 }
194 #else
195 static inline void
196 ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
197 {
198 }
199 #endif
200
201 /* return 0 on success, 1 in case of error */
202 static int ct_seq_show(struct seq_file *s, void *v)
203 {
204         struct nf_conntrack_tuple_hash *hash = v;
205         struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
206         const struct nf_conntrack_l3proto *l3proto;
207         const struct nf_conntrack_l4proto *l4proto;
208         int ret = 0;
209
210         NF_CT_ASSERT(ct);
211         if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
212                 return 0;
213
214         /* we only want to print DIR_ORIGINAL */
215         if (NF_CT_DIRECTION(hash))
216                 goto release;
217
218         l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
219         NF_CT_ASSERT(l3proto);
220         l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
221         NF_CT_ASSERT(l4proto);
222
223         ret = -ENOSPC;
224         seq_printf(s, "%-8s %u %-8s %u %ld ",
225                    l3proto->name, nf_ct_l3num(ct),
226                    l4proto->name, nf_ct_protonum(ct),
227                    timer_pending(&ct->timeout)
228                    ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
229
230         if (l4proto->print_conntrack)
231                 l4proto->print_conntrack(s, ct);
232
233         print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
234                     l3proto, l4proto);
235
236         ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
237
238         if (seq_has_overflowed(s))
239                 goto release;
240
241         if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
242                 goto release;
243
244         if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
245                 seq_printf(s, "[UNREPLIED] ");
246
247         print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
248                     l3proto, l4proto);
249
250         ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
251
252         if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
253                 goto release;
254
255         if (test_bit(IPS_ASSURED_BIT, &ct->status))
256                 seq_printf(s, "[ASSURED] ");
257
258         if (seq_has_overflowed(s))
259                 goto release;
260
261 #if defined(CONFIG_NF_CONNTRACK_MARK)
262         seq_printf(s, "mark=%u ", ct->mark);
263 #endif
264
265         ct_show_secctx(s, ct);
266         ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
267         ct_show_delta_time(s, ct);
268
269         seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
270
271         if (seq_has_overflowed(s))
272                 goto release;
273
274         ret = 0;
275 release:
276         nf_ct_put(ct);
277         return ret;
278 }
279
280 static const struct seq_operations ct_seq_ops = {
281         .start = ct_seq_start,
282         .next  = ct_seq_next,
283         .stop  = ct_seq_stop,
284         .show  = ct_seq_show
285 };
286
287 static int ct_open(struct inode *inode, struct file *file)
288 {
289         return seq_open_net(inode, file, &ct_seq_ops,
290                         sizeof(struct ct_iter_state));
291 }
292
293 static const struct file_operations ct_file_ops = {
294         .owner   = THIS_MODULE,
295         .open    = ct_open,
296         .read    = seq_read,
297         .llseek  = seq_lseek,
298         .release = seq_release_net,
299 };
300
301 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
302 {
303         struct net *net = seq_file_net(seq);
304         int cpu;
305
306         if (*pos == 0)
307                 return SEQ_START_TOKEN;
308
309         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
310                 if (!cpu_possible(cpu))
311                         continue;
312                 *pos = cpu + 1;
313                 return per_cpu_ptr(net->ct.stat, cpu);
314         }
315
316         return NULL;
317 }
318
319 static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
320 {
321         struct net *net = seq_file_net(seq);
322         int cpu;
323
324         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
325                 if (!cpu_possible(cpu))
326                         continue;
327                 *pos = cpu + 1;
328                 return per_cpu_ptr(net->ct.stat, cpu);
329         }
330
331         return NULL;
332 }
333
334 static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
335 {
336 }
337
338 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
339 {
340         struct net *net = seq_file_net(seq);
341         unsigned int nr_conntracks = atomic_read(&net->ct.count);
342         const struct ip_conntrack_stat *st = v;
343
344         if (v == SEQ_START_TOKEN) {
345                 seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
346                 return 0;
347         }
348
349         seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
350                         "%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
351                    nr_conntracks,
352                    st->searched,
353                    st->found,
354                    st->new,
355                    st->invalid,
356                    st->ignore,
357                    st->delete,
358                    st->delete_list,
359                    st->insert,
360                    st->insert_failed,
361                    st->drop,
362                    st->early_drop,
363                    st->error,
364
365                    st->expect_new,
366                    st->expect_create,
367                    st->expect_delete,
368                    st->search_restart
369                 );
370         return 0;
371 }
372
373 static const struct seq_operations ct_cpu_seq_ops = {
374         .start  = ct_cpu_seq_start,
375         .next   = ct_cpu_seq_next,
376         .stop   = ct_cpu_seq_stop,
377         .show   = ct_cpu_seq_show,
378 };
379
380 static int ct_cpu_seq_open(struct inode *inode, struct file *file)
381 {
382         return seq_open_net(inode, file, &ct_cpu_seq_ops,
383                             sizeof(struct seq_net_private));
384 }
385
386 static const struct file_operations ct_cpu_seq_fops = {
387         .owner   = THIS_MODULE,
388         .open    = ct_cpu_seq_open,
389         .read    = seq_read,
390         .llseek  = seq_lseek,
391         .release = seq_release_net,
392 };
393
394 static int nf_conntrack_standalone_init_proc(struct net *net)
395 {
396         struct proc_dir_entry *pde;
397         kuid_t root_uid;
398         kgid_t root_gid;
399
400         pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
401         if (!pde)
402                 goto out_nf_conntrack;
403
404         root_uid = make_kuid(net->user_ns, 0);
405         root_gid = make_kgid(net->user_ns, 0);
406         if (uid_valid(root_uid) && gid_valid(root_gid))
407                 proc_set_user(pde, root_uid, root_gid);
408
409         pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
410                           &ct_cpu_seq_fops);
411         if (!pde)
412                 goto out_stat_nf_conntrack;
413         return 0;
414
415 out_stat_nf_conntrack:
416         remove_proc_entry("nf_conntrack", net->proc_net);
417 out_nf_conntrack:
418         return -ENOMEM;
419 }
420
421 static void nf_conntrack_standalone_fini_proc(struct net *net)
422 {
423         remove_proc_entry("nf_conntrack", net->proc_net_stat);
424         remove_proc_entry("nf_conntrack", net->proc_net);
425 }
426 #else
427 static int nf_conntrack_standalone_init_proc(struct net *net)
428 {
429         return 0;
430 }
431
432 static void nf_conntrack_standalone_fini_proc(struct net *net)
433 {
434 }
435 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
436
437 /* Sysctl support */
438
439 #ifdef CONFIG_SYSCTL
440 /* Log invalid packets of a given protocol */
441 static int log_invalid_proto_min __read_mostly;
442 static int log_invalid_proto_max __read_mostly = 255;
443
444 /* size the user *wants to set */
445 static unsigned int nf_conntrack_htable_size_user __read_mostly;
446
447 static int
448 nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
449                          void __user *buffer, size_t *lenp, loff_t *ppos)
450 {
451         int ret;
452
453         ret = proc_dointvec(table, write, buffer, lenp, ppos);
454         if (ret < 0 || !write)
455                 return ret;
456
457         /* update ret, we might not be able to satisfy request */
458         ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user);
459
460         /* update it to the actual value used by conntrack */
461         nf_conntrack_htable_size_user = nf_conntrack_htable_size;
462         return ret;
463 }
464
465 static struct ctl_table_header *nf_ct_netfilter_header;
466
467 static struct ctl_table nf_ct_sysctl_table[] = {
468         {
469                 .procname       = "nf_conntrack_max",
470                 .data           = &nf_conntrack_max,
471                 .maxlen         = sizeof(int),
472                 .mode           = 0644,
473                 .proc_handler   = proc_dointvec,
474         },
475         {
476                 .procname       = "nf_conntrack_count",
477                 .data           = &init_net.ct.count,
478                 .maxlen         = sizeof(int),
479                 .mode           = 0444,
480                 .proc_handler   = proc_dointvec,
481         },
482         {
483                 .procname       = "nf_conntrack_buckets",
484                 .data           = &nf_conntrack_htable_size_user,
485                 .maxlen         = sizeof(unsigned int),
486                 .mode           = 0644,
487                 .proc_handler   = nf_conntrack_hash_sysctl,
488         },
489         {
490                 .procname       = "nf_conntrack_checksum",
491                 .data           = &init_net.ct.sysctl_checksum,
492                 .maxlen         = sizeof(unsigned int),
493                 .mode           = 0644,
494                 .proc_handler   = proc_dointvec,
495         },
496         {
497                 .procname       = "nf_conntrack_log_invalid",
498                 .data           = &init_net.ct.sysctl_log_invalid,
499                 .maxlen         = sizeof(unsigned int),
500                 .mode           = 0644,
501                 .proc_handler   = proc_dointvec_minmax,
502                 .extra1         = &log_invalid_proto_min,
503                 .extra2         = &log_invalid_proto_max,
504         },
505         {
506                 .procname       = "nf_conntrack_expect_max",
507                 .data           = &nf_ct_expect_max,
508                 .maxlen         = sizeof(int),
509                 .mode           = 0644,
510                 .proc_handler   = proc_dointvec,
511         },
512         { }
513 };
514
515 static struct ctl_table nf_ct_netfilter_table[] = {
516         {
517                 .procname       = "nf_conntrack_max",
518                 .data           = &nf_conntrack_max,
519                 .maxlen         = sizeof(int),
520                 .mode           = 0644,
521                 .proc_handler   = proc_dointvec,
522         },
523         { }
524 };
525
526 static int nf_conntrack_standalone_init_sysctl(struct net *net)
527 {
528         struct ctl_table *table;
529
530         table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
531                         GFP_KERNEL);
532         if (!table)
533                 goto out_kmemdup;
534
535         table[1].data = &net->ct.count;
536         table[3].data = &net->ct.sysctl_checksum;
537         table[4].data = &net->ct.sysctl_log_invalid;
538
539         /* Don't export sysctls to unprivileged users */
540         if (net->user_ns != &init_user_ns)
541                 table[0].procname = NULL;
542
543         if (!net_eq(&init_net, net))
544                 table[2].mode = 0444;
545
546         net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
547         if (!net->ct.sysctl_header)
548                 goto out_unregister_netfilter;
549
550         return 0;
551
552 out_unregister_netfilter:
553         kfree(table);
554 out_kmemdup:
555         return -ENOMEM;
556 }
557
558 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
559 {
560         struct ctl_table *table;
561
562         table = net->ct.sysctl_header->ctl_table_arg;
563         unregister_net_sysctl_table(net->ct.sysctl_header);
564         kfree(table);
565 }
566 #else
567 static int nf_conntrack_standalone_init_sysctl(struct net *net)
568 {
569         return 0;
570 }
571
572 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
573 {
574 }
575 #endif /* CONFIG_SYSCTL */
576
577 static int nf_conntrack_pernet_init(struct net *net)
578 {
579         int ret;
580
581         ret = nf_conntrack_init_net(net);
582         if (ret < 0)
583                 goto out_init;
584
585         ret = nf_conntrack_standalone_init_proc(net);
586         if (ret < 0)
587                 goto out_proc;
588
589         net->ct.sysctl_checksum = 1;
590         net->ct.sysctl_log_invalid = 0;
591         ret = nf_conntrack_standalone_init_sysctl(net);
592         if (ret < 0)
593                 goto out_sysctl;
594
595         return 0;
596
597 out_sysctl:
598         nf_conntrack_standalone_fini_proc(net);
599 out_proc:
600         nf_conntrack_cleanup_net(net);
601 out_init:
602         return ret;
603 }
604
605 static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
606 {
607         struct net *net;
608
609         list_for_each_entry(net, net_exit_list, exit_list) {
610                 nf_conntrack_standalone_fini_sysctl(net);
611                 nf_conntrack_standalone_fini_proc(net);
612         }
613         nf_conntrack_cleanup_net_list(net_exit_list);
614 }
615
616 static struct pernet_operations nf_conntrack_net_ops = {
617         .init           = nf_conntrack_pernet_init,
618         .exit_batch     = nf_conntrack_pernet_exit,
619 };
620
621 static int __init nf_conntrack_standalone_init(void)
622 {
623         int ret = nf_conntrack_init_start();
624         if (ret < 0)
625                 goto out_start;
626
627 #ifdef CONFIG_SYSCTL
628         nf_ct_netfilter_header =
629                 register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
630         if (!nf_ct_netfilter_header) {
631                 pr_err("nf_conntrack: can't register to sysctl.\n");
632                 ret = -ENOMEM;
633                 goto out_sysctl;
634         }
635
636         nf_conntrack_htable_size_user = nf_conntrack_htable_size;
637 #endif
638
639         ret = register_pernet_subsys(&nf_conntrack_net_ops);
640         if (ret < 0)
641                 goto out_pernet;
642
643         nf_conntrack_init_end();
644         return 0;
645
646 out_pernet:
647 #ifdef CONFIG_SYSCTL
648         unregister_net_sysctl_table(nf_ct_netfilter_header);
649 out_sysctl:
650 #endif
651         nf_conntrack_cleanup_end();
652 out_start:
653         return ret;
654 }
655
656 static void __exit nf_conntrack_standalone_fini(void)
657 {
658         nf_conntrack_cleanup_start();
659         unregister_pernet_subsys(&nf_conntrack_net_ops);
660 #ifdef CONFIG_SYSCTL
661         unregister_net_sysctl_table(nf_ct_netfilter_header);
662 #endif
663         nf_conntrack_cleanup_end();
664 }
665
666 module_init(nf_conntrack_standalone_init);
667 module_exit(nf_conntrack_standalone_fini);
668
669 /* Some modules need us, but don't depend directly on any symbol.
670    They should call this. */
671 void need_conntrack(void)
672 {
673 }
674 EXPORT_SYMBOL_GPL(need_conntrack);