tools/vm/page-types.c: page-cache sniffing feature
[cascardo/linux.git] / tools / vm / page-types.c
index f9be24d..05654f5 100644 (file)
@@ -19,7 +19,8 @@
  * Authors: Wu Fengguang <fengguang.wu@intel.com>
  */
 
-#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <limits.h>
 #include <assert.h>
+#include <ftw.h>
+#include <time.h>
 #include <sys/types.h>
 #include <sys/errno.h>
 #include <sys/fcntl.h>
 #include <sys/mount.h>
 #include <sys/statfs.h>
+#include <sys/mman.h>
 #include "../../include/uapi/linux/magic.h"
 #include "../../include/uapi/linux/kernel-page-flags.h"
 #include <api/fs/debugfs.h>
@@ -158,6 +162,7 @@ static int          opt_raw;        /* for kernel developers */
 static int             opt_list;       /* list pages (in ranges) */
 static int             opt_no_summary; /* don't show summary */
 static pid_t           opt_pid;        /* process to walk */
+const char *           opt_file;
 
 #define MAX_ADDR_RANGES        1024
 static int             nr_addr_ranges;
@@ -253,12 +258,7 @@ static unsigned long do_u64_read(int fd, char *name,
        if (index > ULONG_MAX / 8)
                fatal("index overflow: %lu\n", index);
 
-       if (lseek(fd, index * 8, SEEK_SET) < 0) {
-               perror(name);
-               exit(EXIT_FAILURE);
-       }
-
-       bytes = read(fd, buf, count * 8);
+       bytes = pread(fd, buf, count * 8, (off_t)index * 8);
        if (bytes < 0) {
                perror(name);
                exit(EXIT_FAILURE);
@@ -343,8 +343,8 @@ static char *page_flag_longname(uint64_t flags)
  * page list and summary
  */
 
-static void show_page_range(unsigned long voffset,
-                           unsigned long offset, uint64_t flags)
+static void show_page_range(unsigned long voffset, unsigned long offset,
+                           unsigned long size, uint64_t flags)
 {
        static uint64_t      flags0;
        static unsigned long voff;
@@ -352,14 +352,16 @@ static void show_page_range(unsigned long voffset,
        static unsigned long count;
 
        if (flags == flags0 && offset == index + count &&
-           (!opt_pid || voffset == voff + count)) {
-               count++;
+           size && voffset == voff + count) {
+               count += size;
                return;
        }
 
        if (count) {
                if (opt_pid)
                        printf("%lx\t", voff);
+               if (opt_file)
+                       printf("%lu\t", voff);
                printf("%lx\t%lx\t%s\n",
                                index, count, page_flag_name(flags0));
        }
@@ -367,7 +369,12 @@ static void show_page_range(unsigned long voffset,
        flags0 = flags;
        index  = offset;
        voff   = voffset;
-       count  = 1;
+       count  = size;
+}
+
+static void flush_page_range(void)
+{
+       show_page_range(0, 0, 0, 0);
 }
 
 static void show_page(unsigned long voffset,
@@ -375,6 +382,8 @@ static void show_page(unsigned long voffset,
 {
        if (opt_pid)
                printf("%lx\t", voffset);
+       if (opt_file)
+               printf("%lu\t", voffset);
        printf("%lx\t%s\n", offset, page_flag_name(flags));
 }
 
@@ -565,7 +574,7 @@ static void add_page(unsigned long voffset,
                unpoison_page(offset);
 
        if (opt_list == 1)
-               show_page_range(voffset, offset, flags);
+               show_page_range(voffset, offset, 1, flags);
        else if (opt_list == 2)
                show_page(voffset, offset, flags);
 
@@ -667,7 +676,7 @@ static void walk_addr_ranges(void)
 
        for (i = 0; i < nr_addr_ranges; i++)
                if (!opt_pid)
-                       walk_pfn(0, opt_offset[i], opt_size[i], 0);
+                       walk_pfn(opt_offset[i], opt_offset[i], opt_size[i], 0);
                else
                        walk_task(opt_offset[i], opt_size[i]);
 
@@ -699,9 +708,7 @@ static void usage(void)
 "            -a|--addr    addr-spec     Walk a range of pages\n"
 "            -b|--bits    bits-spec     Walk pages with specified bits\n"
 "            -p|--pid     pid           Walk process address space\n"
-#if 0 /* planned features */
 "            -f|--file    filename      Walk file address space\n"
-#endif
 "            -l|--list                  Show page details in ranges\n"
 "            -L|--list-each             Show page details one by one\n"
 "            -N|--no-summary            Don't show summary info\n"
@@ -799,8 +806,130 @@ static void parse_pid(const char *str)
        fclose(file);
 }
 
+static void show_file(const char *name, const struct stat *st)
+{
+       unsigned long long size = st->st_size;
+       char atime[64], mtime[64];
+       long now = time(NULL);
+
+       printf("%s\tInode: %u\tSize: %llu (%llu pages)\n",
+                       name, (unsigned)st->st_ino,
+                       size, (size + page_size - 1) / page_size);
+
+       strftime(atime, sizeof(atime), "%c", localtime(&st->st_atime));
+       strftime(mtime, sizeof(mtime), "%c", localtime(&st->st_mtime));
+
+       printf("Modify: %s (%ld seconds ago)\nAccess: %s (%ld seconds ago)\n",
+                       mtime, now - st->st_mtime,
+                       atime, now - st->st_atime);
+}
+
+static void walk_file(const char *name, const struct stat *st)
+{
+       uint8_t vec[PAGEMAP_BATCH];
+       uint64_t buf[PAGEMAP_BATCH], flags;
+       unsigned long nr_pages, pfn, i;
+       int fd;
+       off_t off;
+       ssize_t len;
+       void *ptr;
+       int first = 1;
+
+       fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW);
+
+       for (off = 0; off < st->st_size; off += len) {
+               nr_pages = (st->st_size - off + page_size - 1) / page_size;
+               if (nr_pages > PAGEMAP_BATCH)
+                       nr_pages = PAGEMAP_BATCH;
+               len = nr_pages * page_size;
+
+               ptr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, off);
+               if (ptr == MAP_FAILED)
+                       fatal("mmap failed: %s", name);
+
+               /* determine cached pages */
+               if (mincore(ptr, len, vec))
+                       fatal("mincore failed: %s", name);
+
+               /* turn off readahead */
+               if (madvise(ptr, len, MADV_RANDOM))
+                       fatal("madvice failed: %s", name);
+
+               /* populate ptes */
+               for (i = 0; i < nr_pages ; i++) {
+                       if (vec[i] & 1)
+                               (void)*(volatile int *)(ptr + i * page_size);
+               }
+
+               /* turn off harvesting reference bits */
+               if (madvise(ptr, len, MADV_SEQUENTIAL))
+                       fatal("madvice failed: %s", name);
+
+               if (pagemap_read(buf, (unsigned long)ptr / page_size,
+                                       nr_pages) != nr_pages)
+                       fatal("cannot read pagemap");
+
+               munmap(ptr, len);
+
+               for (i = 0; i < nr_pages; i++) {
+                       pfn = pagemap_pfn(buf[i]);
+                       if (!pfn)
+                               continue;
+                       if (!kpageflags_read(&flags, pfn, 1))
+                               continue;
+                       if (first && opt_list) {
+                               first = 0;
+                               flush_page_range();
+                               show_file(name, st);
+                       }
+                       add_page(off / page_size + i, pfn, flags, buf[i]);
+               }
+       }
+
+       close(fd);
+}
+
+int walk_tree(const char *name, const struct stat *st, int type, struct FTW *f)
+{
+       (void)f;
+       switch (type) {
+       case FTW_F:
+               if (S_ISREG(st->st_mode))
+                       walk_file(name, st);
+               break;
+       case FTW_DNR:
+               fprintf(stderr, "cannot read dir: %s\n", name);
+               break;
+       }
+       return 0;
+}
+
+static void walk_page_cache(void)
+{
+       struct stat st;
+
+       kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
+       pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY);
+
+       if (stat(opt_file, &st))
+               fatal("stat failed: %s\n", opt_file);
+
+       if (S_ISREG(st.st_mode)) {
+               walk_file(opt_file, &st);
+       } else if (S_ISDIR(st.st_mode)) {
+               /* do not follow symlinks and mountpoints */
+               if (nftw(opt_file, walk_tree, 64, FTW_MOUNT | FTW_PHYS) < 0)
+                       fatal("nftw failed: %s\n", opt_file);
+       } else
+               fatal("unhandled file type: %s\n", opt_file);
+
+       close(kpageflags_fd);
+       close(pagemap_fd);
+}
+
 static void parse_file(const char *name)
 {
+       opt_file = name;
 }
 
 static void parse_addr_range(const char *optarg)
@@ -991,15 +1120,20 @@ int main(int argc, char *argv[])
 
        if (opt_list && opt_pid)
                printf("voffset\t");
+       if (opt_list && opt_file)
+               printf("foffset\t");
        if (opt_list == 1)
                printf("offset\tlen\tflags\n");
        if (opt_list == 2)
                printf("offset\tflags\n");
 
-       walk_addr_ranges();
+       if (opt_file)
+               walk_page_cache();
+       else
+               walk_addr_ranges();
 
        if (opt_list == 1)
-               show_page_range(0, 0, 0);  /* drain the buffer */
+               flush_page_range();
 
        if (opt_no_summary)
                return 0;