Merge tag 'xfs-reflink-for-linus-4.9-rc1' of git://git.kernel.org/pub/scm/linux/kerne...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Oct 2016 03:28:22 +0000 (20:28 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Oct 2016 03:28:22 +0000 (20:28 -0700)
    < XFS has gained super CoW powers! >
     ----------------------------------
            \   ^__^
             \  (oo)\_______
                (__)\       )\/\
                    ||----w |
                    ||     ||

Pull XFS support for shared data extents from Dave Chinner:
 "This is the second part of the XFS updates for this merge cycle.  This
  pullreq contains the new shared data extents feature for XFS.

  Given the complexity and size of this change I am expecting - like the
  addition of reverse mapping last cycle - that there will be some
  follow-up bug fixes and cleanups around the -rc3 stage for issues that
  I'm sure will show up once the code hits a wider userbase.

  What it is:

  At the most basic level we are simply adding shared data extents to
  XFS - i.e. a single extent on disk can now have multiple owners. To do
  this we have to add new on-disk features to both track the shared
  extents and the number of times they've been shared. This is done by
  the new "refcount" btree that sits in every allocation group. When we
  share or unshare an extent, this tree gets updated.

  Along with this new tree, the reverse mapping tree needs to be updated
  to track each owner or a shared extent. This also needs to be updated
  ever share/unshare operation. These interactions at extent allocation
  and freeing time have complex ordering and recovery constraints, so
  there's a significant amount of new intent-based transaction code to
  ensure that operations are performed atomically from both the runtime
  and integrity/crash recovery perspectives.

  We also need to break sharing when writes hit a shared extent - this
  is where the new copy-on-write implementation comes in. We allocate
  new storage and copy the original data along with the overwrite data
  into the new location. We only do this for data as we don't share
  metadata at all - each inode has it's own metadata that tracks the
  shared data extents, the extents undergoing CoW and it's own private
  extents.

  Of course, being XFS, nothing is simple - we use delayed allocation
  for CoW similar to how we use it for normal writes. ENOSPC is a
  significant issue here - we build on the reservation code added in
  4.8-rc1 with the reverse mapping feature to ensure we don't get
  spurious ENOSPC issues part way through a CoW operation. These
  mechanisms also help minimise fragmentation due to repeated CoW
  operations. To further reduce fragmentation overhead, we've also
  introduced a CoW extent size hint, which indicates how large a region
  we should allocate when we execute a CoW operation.

  With all this functionality in place, we can hook up .copy_file_range,
  .clone_file_range and .dedupe_file_range and we gain all the
  capabilities of reflink and other vfs provided functionality that
  enable manipulation to shared extents. We also added a fallocate mode
  that explicitly unshares a range of a file, which we implemented as an
  explicit CoW of all the shared extents in a file.

  As such, it's a huge chunk of new functionality with new on-disk
  format features and internal infrastructure. It warns at mount time as
  an experimental feature and that it may eat data (as we do with all
  new on-disk features until they stabilise). We have not released
  userspace suport for it yet - userspace support currently requires
  download from Darrick's xfsprogs repo and build from source, so the
  access to this feature is really developer/tester only at this point.
  Initial userspace support will be released at the same time the kernel
  with this code in it is released.

  The new code causes 5-6 new failures with xfstests - these aren't
  serious functional failures but things the output of tests changing
  slightly due to perturbations in layouts, space usage, etc. OTOH,
  we've added 150+ new tests to xfstests that specifically exercise this
  new functionality so it's got far better test coverage than any
  functionality we've previously added to XFS.

  Darrick has done a pretty amazing job getting us to this stage, and
  special mention also needs to go to Christoph (review, testing,
  improvements and bug fixes) and Brian (caught several intricate bugs
  during review) for the effort they've also put in.

  Summary:

   - unshare range (FALLOC_FL_UNSHARE) support for fallocate

   - copy-on-write extent size hints (FS_XFLAG_COWEXTSIZE) for fsxattr
     interface

   - shared extent support for XFS

   - copy-on-write support for shared extents

   - copy_file_range support

   - clone_file_range support (implements reflink)

   - dedupe_file_range support

   - defrag support for reverse mapping enabled filesystems"

* tag 'xfs-reflink-for-linus-4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (71 commits)
  xfs: convert COW blocks to real blocks before unwritten extent conversion
  xfs: rework refcount cow recovery error handling
  xfs: clear reflink flag if setting realtime flag
  xfs: fix error initialization
  xfs: fix label inaccuracies
  xfs: remove isize check from unshare operation
  xfs: reduce stack usage of _reflink_clear_inode_flag
  xfs: check inode reflink flag before calling reflink functions
  xfs: implement swapext for rmap filesystems
  xfs: refactor swapext code
  xfs: various swapext cleanups
  xfs: recognize the reflink feature bit
  xfs: simulate per-AG reservations being critically low
  xfs: don't mix reflink and DAX mode for now
  xfs: check for invalid inode reflink flags
  xfs: set a default CoW extent size of 32 blocks
  xfs: convert unwritten status of reverse mappings for shared files
  xfs: use interval query for rmap alloc operations on shared files
  xfs: add shared rmap map/unmap/convert log item types
  xfs: increase log reservations for reflink
  ...

1  2 
fs/open.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_trace.h
include/uapi/linux/fs.h

diff --combined fs/open.c
+++ b/fs/open.c
@@@ -68,7 -68,6 +68,7 @@@ int do_truncate(struct dentry *dentry, 
  long vfs_truncate(const struct path *path, loff_t length)
  {
        struct inode *inode;
 +      struct dentry *upperdentry;
        long error;
  
        inode = path->dentry->d_inode;
        if (IS_APPEND(inode))
                goto mnt_drop_write_and_out;
  
 -      error = get_write_access(inode);
 +      /*
 +       * If this is an overlayfs then do as if opening the file so we get
 +       * write access on the upper inode, not on the overlay inode.  For
 +       * non-overlay filesystems d_real() is an identity function.
 +       */
 +      upperdentry = d_real(path->dentry, NULL, O_WRONLY);
 +      error = PTR_ERR(upperdentry);
 +      if (IS_ERR(upperdentry))
 +              goto mnt_drop_write_and_out;
 +
 +      error = get_write_access(upperdentry->d_inode);
        if (error)
                goto mnt_drop_write_and_out;
  
                error = do_truncate(path->dentry, length, 0, NULL);
  
  put_write_and_out:
 -      put_write_access(inode);
 +      put_write_access(upperdentry->d_inode);
  mnt_drop_write_and_out:
        mnt_drop_write(path->mnt);
  out:
@@@ -267,6 -256,11 +267,11 @@@ int vfs_fallocate(struct file *file, in
            (mode & ~FALLOC_FL_INSERT_RANGE))
                return -EINVAL;
  
+       /* Unshare range should only be used with allocate mode. */
+       if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
+           (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
+               return -EINVAL;
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
  
         * Let individual file system decide if it supports preallocation
         * for directories or not.
         */
 -      if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
 +      if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
 +          !S_ISBLK(inode->i_mode))
                return -ENODEV;
  
        /* Check for wrap through zero too */
@@@ -738,7 -731,7 +743,7 @@@ static int do_dentry_open(struct file *
        if (error)
                goto cleanup_all;
  
 -      error = break_lease(inode, f->f_flags);
 +      error = break_lease(locks_inode(f), f->f_flags);
        if (error)
                goto cleanup_all;
  
diff --combined fs/xfs/xfs_file.c
@@@ -38,6 -38,7 +38,7 @@@
  #include "xfs_icache.h"
  #include "xfs_pnfs.h"
  #include "xfs_iomap.h"
+ #include "xfs_reflink.h"
  
  #include <linux/dcache.h>
  #include <linux/falloc.h>
@@@ -319,7 -320,7 +320,7 @@@ xfs_file_dio_aio_read
        data = *to;
        ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
                        xfs_get_blocks_direct, NULL, NULL, 0);
 -      if (ret > 0) {
 +      if (ret >= 0) {
                iocb->ki_pos += ret;
                iov_iter_advance(to, ret);
        }
@@@ -393,6 -394,45 +394,6 @@@ xfs_file_read_iter
        return ret;
  }
  
 -STATIC ssize_t
 -xfs_file_splice_read(
 -      struct file             *infilp,
 -      loff_t                  *ppos,
 -      struct pipe_inode_info  *pipe,
 -      size_t                  count,
 -      unsigned int            flags)
 -{
 -      struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
 -      ssize_t                 ret;
 -
 -      XFS_STATS_INC(ip->i_mount, xs_read_calls);
 -
 -      if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 -              return -EIO;
 -
 -      trace_xfs_file_splice_read(ip, count, *ppos);
 -
 -      /*
 -       * DAX inodes cannot ues the page cache for splice, so we have to push
 -       * them through the VFS IO path. This means it goes through
 -       * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
 -       * cannot lock the splice operation at this level for DAX inodes.
 -       */
 -      if (IS_DAX(VFS_I(ip))) {
 -              ret = default_file_splice_read(infilp, ppos, pipe, count,
 -                                             flags);
 -              goto out;
 -      }
 -
 -      xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
 -      ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
 -      xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 -out:
 -      if (ret > 0)
 -              XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
 -      return ret;
 -}
 -
  /*
   * Zero any on disk space between the current EOF and the new, larger EOF.
   *
@@@ -634,6 -674,13 +635,13 @@@ xfs_file_dio_aio_write
  
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
  
+       /* If this is a block-aligned directio CoW, remap immediately. */
+       if (xfs_is_reflink_inode(ip) && !unaligned_io) {
+               ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
+               if (ret)
+                       goto out;
+       }
        data = *from;
        ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
                        xfs_get_blocks_direct, xfs_end_io_direct_write,
@@@ -735,6 -782,9 +743,9 @@@ write_retry
                enospc = xfs_inode_free_quota_eofblocks(ip);
                if (enospc)
                        goto write_retry;
+               enospc = xfs_inode_free_quota_cowblocks(ip);
+               if (enospc)
+                       goto write_retry;
        } else if (ret == -ENOSPC && !enospc) {
                struct xfs_eofblocks eofb = {0};
  
@@@ -774,10 -824,20 +785,20 @@@ xfs_file_write_iter
  
        if (IS_DAX(inode))
                ret = xfs_file_dax_write(iocb, from);
-       else if (iocb->ki_flags & IOCB_DIRECT)
+       else if (iocb->ki_flags & IOCB_DIRECT) {
+               /*
+                * Allow a directio write to fall back to a buffered
+                * write *only* in the case that we're doing a reflink
+                * CoW.  In all other directio scenarios we do not
+                * allow an operation to fall back to buffered mode.
+                */
                ret = xfs_file_dio_aio_write(iocb, from);
-       else
+               if (ret == -EREMCHG)
+                       goto buffered;
+       } else {
+ buffered:
                ret = xfs_file_buffered_aio_write(iocb, from);
+       }
  
        if (ret > 0) {
                XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
  #define       XFS_FALLOC_FL_SUPPORTED                                         \
                (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
                 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
-                FALLOC_FL_INSERT_RANGE)
+                FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)
  
  STATIC long
  xfs_file_fallocate(
  
                if (mode & FALLOC_FL_ZERO_RANGE)
                        error = xfs_zero_file_space(ip, offset, len);
-               else
+               else {
+                       if (mode & FALLOC_FL_UNSHARE_RANGE) {
+                               error = xfs_reflink_unshare(ip, offset, len);
+                               if (error)
+                                       goto out_unlock;
+                       }
                        error = xfs_alloc_file_space(ip, offset, len,
                                                     XFS_BMAPI_PREALLOC);
+               }
                if (error)
                        goto out_unlock;
        }
  
                iattr.ia_valid = ATTR_SIZE;
                iattr.ia_size = new_size;
 -              error = xfs_setattr_size(ip, &iattr);
 +              error = xfs_vn_setattr_size(file_dentry(file), &iattr);
                if (error)
                        goto out_unlock;
        }
@@@ -920,6 -986,189 +947,189 @@@ out_unlock
        return error;
  }
  
+ /*
+  * Flush all file writes out to disk.
+  */
+ static int
+ xfs_file_wait_for_io(
+       struct inode    *inode,
+       loff_t          offset,
+       size_t          len)
+ {
+       loff_t          rounding;
+       loff_t          ioffset;
+       loff_t          iendoffset;
+       loff_t          bs;
+       int             ret;
+       bs = inode->i_sb->s_blocksize;
+       inode_dio_wait(inode);
+       rounding = max_t(xfs_off_t, bs, PAGE_SIZE);
+       ioffset = round_down(offset, rounding);
+       iendoffset = round_up(offset + len, rounding) - 1;
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                                          iendoffset);
+       return ret;
+ }
+ /* Hook up to the VFS reflink function */
+ STATIC int
+ xfs_file_share_range(
+       struct file     *file_in,
+       loff_t          pos_in,
+       struct file     *file_out,
+       loff_t          pos_out,
+       u64             len,
+       bool            is_dedupe)
+ {
+       struct inode    *inode_in;
+       struct inode    *inode_out;
+       ssize_t         ret;
+       loff_t          bs;
+       loff_t          isize;
+       int             same_inode;
+       loff_t          blen;
+       unsigned int    flags = 0;
+       inode_in = file_inode(file_in);
+       inode_out = file_inode(file_out);
+       bs = inode_out->i_sb->s_blocksize;
+       /* Don't touch certain kinds of inodes */
+       if (IS_IMMUTABLE(inode_out))
+               return -EPERM;
+       if (IS_SWAPFILE(inode_in) ||
+           IS_SWAPFILE(inode_out))
+               return -ETXTBSY;
+       /* Reflink only works within this filesystem. */
+       if (inode_in->i_sb != inode_out->i_sb)
+               return -EXDEV;
+       same_inode = (inode_in->i_ino == inode_out->i_ino);
+       /* Don't reflink dirs, pipes, sockets... */
+       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+               return -EISDIR;
+       if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
+               return -EINVAL;
+       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+               return -EINVAL;
+       /* Don't share DAX file data for now. */
+       if (IS_DAX(inode_in) || IS_DAX(inode_out))
+               return -EINVAL;
+       /* Are we going all the way to the end? */
+       isize = i_size_read(inode_in);
+       if (isize == 0)
+               return 0;
+       if (len == 0)
+               len = isize - pos_in;
+       /* Ensure offsets don't wrap and the input is inside i_size */
+       if (pos_in + len < pos_in || pos_out + len < pos_out ||
+           pos_in + len > isize)
+               return -EINVAL;
+       /* Don't allow dedupe past EOF in the dest file */
+       if (is_dedupe) {
+               loff_t  disize;
+               disize = i_size_read(inode_out);
+               if (pos_out >= disize || pos_out + len > disize)
+                       return -EINVAL;
+       }
+       /* If we're linking to EOF, continue to the block boundary. */
+       if (pos_in + len == isize)
+               blen = ALIGN(isize, bs) - pos_in;
+       else
+               blen = len;
+       /* Only reflink if we're aligned to block boundaries */
+       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
+           !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
+               return -EINVAL;
+       /* Don't allow overlapped reflink within the same file */
+       if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen)
+               return -EINVAL;
+       /* Wait for the completion of any pending IOs on srcfile */
+       ret = xfs_file_wait_for_io(inode_in, pos_in, len);
+       if (ret)
+               goto out;
+       ret = xfs_file_wait_for_io(inode_out, pos_out, len);
+       if (ret)
+               goto out;
+       if (is_dedupe)
+               flags |= XFS_REFLINK_DEDUPE;
+       ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
+                       pos_out, len, flags);
+       if (ret < 0)
+               goto out;
+ out:
+       return ret;
+ }
+ STATIC ssize_t
+ xfs_file_copy_range(
+       struct file     *file_in,
+       loff_t          pos_in,
+       struct file     *file_out,
+       loff_t          pos_out,
+       size_t          len,
+       unsigned int    flags)
+ {
+       int             error;
+       error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+                                    len, false);
+       if (error)
+               return error;
+       return len;
+ }
+ STATIC int
+ xfs_file_clone_range(
+       struct file     *file_in,
+       loff_t          pos_in,
+       struct file     *file_out,
+       loff_t          pos_out,
+       u64             len)
+ {
+       return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+                                    len, false);
+ }
+ #define XFS_MAX_DEDUPE_LEN    (16 * 1024 * 1024)
+ STATIC ssize_t
+ xfs_file_dedupe_range(
+       struct file     *src_file,
+       u64             loff,
+       u64             len,
+       struct file     *dst_file,
+       u64             dst_loff)
+ {
+       int             error;
+       /*
+        * Limit the total length we will dedupe for each operation.
+        * This is intended to bound the total time spent in this
+        * ioctl to something sane.
+        */
+       if (len > XFS_MAX_DEDUPE_LEN)
+               len = XFS_MAX_DEDUPE_LEN;
+       error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
+                                    len, true);
+       if (error)
+               return error;
+       return len;
+ }
  
  STATIC int
  xfs_file_open(
@@@ -1569,7 -1818,7 +1779,7 @@@ const struct file_operations xfs_file_o
        .llseek         = xfs_file_llseek,
        .read_iter      = xfs_file_read_iter,
        .write_iter     = xfs_file_write_iter,
 -      .splice_read    = xfs_file_splice_read,
 +      .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .unlocked_ioctl = xfs_file_ioctl,
  #ifdef CONFIG_COMPAT
        .open           = xfs_file_open,
        .release        = xfs_file_release,
        .fsync          = xfs_file_fsync,
 +      .get_unmapped_area = thp_get_unmapped_area,
        .fallocate      = xfs_file_fallocate,
+       .copy_file_range = xfs_file_copy_range,
+       .clone_file_range = xfs_file_clone_range,
+       .dedupe_file_range = xfs_file_dedupe_range,
  };
  
  const struct file_operations xfs_dir_file_operations = {
diff --combined fs/xfs/xfs_inode.c
@@@ -49,6 -49,7 +49,7 @@@
  #include "xfs_trans_priv.h"
  #include "xfs_log.h"
  #include "xfs_bmap_btree.h"
+ #include "xfs_reflink.h"
  
  kmem_zone_t *xfs_inode_zone;
  
@@@ -76,6 -77,29 +77,29 @@@ xfs_get_extsz_hint
        return 0;
  }
  
+ /*
+  * Helper function to extract CoW extent size hint from inode.
+  * Between the extent size hint and the CoW extent size hint, we
+  * return the greater of the two.  If the value is zero (automatic),
+  * use the default size.
+  */
+ xfs_extlen_t
+ xfs_get_cowextsz_hint(
+       struct xfs_inode        *ip)
+ {
+       xfs_extlen_t            a, b;
+       a = 0;
+       if (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
+               a = ip->i_d.di_cowextsize;
+       b = xfs_get_extsz_hint(ip);
+       a = max(a, b);
+       if (a == 0)
+               return XFS_DEFAULT_COWEXTSZ_HINT;
+       return a;
+ }
  /*
   * These two are wrapper routines around the xfs_ilock() routine used to
   * centralize some grungy code.  They are used in places that wish to lock the
@@@ -651,6 -675,8 +675,8 @@@ _xfs_dic2xflags
        if (di_flags2 & XFS_DIFLAG2_ANY) {
                if (di_flags2 & XFS_DIFLAG2_DAX)
                        flags |= FS_XFLAG_DAX;
+               if (di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
+                       flags |= FS_XFLAG_COWEXTSIZE;
        }
  
        if (has_attr)
@@@ -821,7 -847,7 +847,7 @@@ xfs_ialloc
        ip->i_d.di_nextents = 0;
        ASSERT(ip->i_d.di_nblocks == 0);
  
 -      tv = current_fs_time(mp->m_super);
 +      tv = current_time(inode);
        inode->i_mtime = tv;
        inode->i_atime = tv;
        inode->i_ctime = tv;
        if (ip->i_d.di_version == 3) {
                inode->i_version = 1;
                ip->i_d.di_flags2 = 0;
+               ip->i_d.di_cowextsize = 0;
                ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
                ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
        }
                        ip->i_d.di_flags |= di_flags;
                        ip->i_d.di_flags2 |= di_flags2;
                }
+               if (pip &&
+                   (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) &&
+                   pip->i_d.di_version == 3 &&
+                   ip->i_d.di_version == 3) {
+                       if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) {
+                               ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+                               ip->i_d.di_cowextsize = pip->i_d.di_cowextsize;
+                       }
+               }
                /* FALLTHROUGH */
        case S_IFLNK:
                ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
@@@ -1586,6 -1622,20 +1622,20 @@@ xfs_itruncate_extents
                        goto out;
        }
  
+       /* Remove all pending CoW reservations. */
+       error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
+                       last_block);
+       if (error)
+               goto out;
+       /*
+        * Clear the reflink flag if we truncated everything.
+        */
+       if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
+               ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+               xfs_inode_clear_cowblocks_tag(ip);
+       }
        /*
         * Always re-log the inode so that our permanent transaction can keep
         * on rolling it forward in the log.
@@@ -1710,7 -1760,7 +1760,7 @@@ xfs_inactive_truncate
        /*
         * Log the inode size first to prevent stale data exposure in the event
         * of a system crash before the truncate completes. See the related
 -       * comment in xfs_setattr_size() for details.
 +       * comment in xfs_vn_setattr_size() for details.
         */
        ip->i_d.di_size = 0;
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@@ -1850,6 -1900,7 +1900,7 @@@ xfs_inactive
        }
  
        mp = ip->i_mount;
+       ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
  
        /* If this is a read-only mount, don't do this (would generate I/O) */
        if (mp->m_flags & XFS_MOUNT_RDONLY)
diff --combined fs/xfs/xfs_ioctl.c
@@@ -720,7 -720,7 +720,7 @@@ xfs_ioc_space
                iattr.ia_valid = ATTR_SIZE;
                iattr.ia_size = bf->l_start;
  
 -              error = xfs_setattr_size(ip, &iattr);
 +              error = xfs_vn_setattr_size(file_dentry(filp), &iattr);
                break;
        default:
                ASSERT(0);
@@@ -903,6 -903,8 +903,8 @@@ xfs_ioc_fsgetxattr
        xfs_ilock(ip, XFS_ILOCK_SHARED);
        fa.fsx_xflags = xfs_ip2xflags(ip);
        fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
+       fa.fsx_cowextsize = ip->i_d.di_cowextsize <<
+                       ip->i_mount->m_sb.sb_blocklog;
        fa.fsx_projid = xfs_get_projid(ip);
  
        if (attr) {
@@@ -973,12 -975,13 +975,13 @@@ xfs_set_diflags
        if (ip->i_d.di_version < 3)
                return;
  
-       di_flags2 = 0;
+       di_flags2 = (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
        if (xflags & FS_XFLAG_DAX)
                di_flags2 |= XFS_DIFLAG2_DAX;
+       if (xflags & FS_XFLAG_COWEXTSIZE)
+               di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
  
        ip->i_d.di_flags2 = di_flags2;
  }
  
  STATIC void
@@@ -1031,6 -1034,14 +1034,14 @@@ xfs_ioctl_setattr_xflags
                        return -EINVAL;
        }
  
+       /* Clear reflink if we are actually able to set the rt flag. */
+       if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
+               ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+       /* Don't allow us to set DAX mode for a reflinked file for now. */
+       if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip))
+               return -EINVAL;
        /*
         * Can't modify an immutable/append-only file unless
         * we have appropriate permission.
@@@ -1219,6 -1230,56 +1230,56 @@@ xfs_ioctl_setattr_check_extsize
        return 0;
  }
  
+ /*
+  * CoW extent size hint validation rules are:
+  *
+  * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
+  *    The inode does not have to have any shared blocks, but it must be a v3.
+  * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
+  *    for a directory, the hint is propagated to new files.
+  * 3. Can be changed on files & directories at any time.
+  * 4. CoW extsize hint of 0 turns off hints, clears inode flags.
+  * 5. Extent size must be a multiple of the appropriate block size.
+  * 6. The extent size hint must be limited to half the AG size to avoid
+  *    alignment extending the extent beyond the limits of the AG.
+  */
+ static int
+ xfs_ioctl_setattr_check_cowextsize(
+       struct xfs_inode        *ip,
+       struct fsxattr          *fa)
+ {
+       struct xfs_mount        *mp = ip->i_mount;
+       if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE))
+               return 0;
+       if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb) ||
+           ip->i_d.di_version != 3)
+               return -EINVAL;
+       if (!S_ISREG(VFS_I(ip)->i_mode) && !S_ISDIR(VFS_I(ip)->i_mode))
+               return -EINVAL;
+       if (fa->fsx_cowextsize != 0) {
+               xfs_extlen_t    size;
+               xfs_fsblock_t   cowextsize_fsb;
+               cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize);
+               if (cowextsize_fsb > MAXEXTLEN)
+                       return -EINVAL;
+               size = mp->m_sb.sb_blocksize;
+               if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2)
+                       return -EINVAL;
+               if (fa->fsx_cowextsize % size)
+                       return -EINVAL;
+       } else
+               fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
+       return 0;
+ }
  static int
  xfs_ioctl_setattr_check_projid(
        struct xfs_inode        *ip,
@@@ -1311,6 -1372,10 +1372,10 @@@ xfs_ioctl_setattr
        if (code)
                goto error_trans_cancel;
  
+       code = xfs_ioctl_setattr_check_cowextsize(ip, fa);
+       if (code)
+               goto error_trans_cancel;
        code = xfs_ioctl_setattr_xflags(tp, ip, fa);
        if (code)
                goto error_trans_cancel;
                ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
        else
                ip->i_d.di_extsize = 0;
+       if (ip->i_d.di_version == 3 &&
+           (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
+               ip->i_d.di_cowextsize = fa->fsx_cowextsize >>
+                               mp->m_sb.sb_blocklog;
+       else
+               ip->i_d.di_cowextsize = 0;
  
        code = xfs_trans_commit(tp);
  
diff --combined fs/xfs/xfs_iops.c
@@@ -542,28 -542,6 +542,28 @@@ xfs_setattr_time
                inode->i_mtime = iattr->ia_mtime;
  }
  
 +static int
 +xfs_vn_change_ok(
 +      struct dentry   *dentry,
 +      struct iattr    *iattr)
 +{
 +      struct xfs_mount        *mp = XFS_I(d_inode(dentry))->i_mount;
 +
 +      if (mp->m_flags & XFS_MOUNT_RDONLY)
 +              return -EROFS;
 +
 +      if (XFS_FORCED_SHUTDOWN(mp))
 +              return -EIO;
 +
 +      return setattr_prepare(dentry, iattr);
 +}
 +
 +/*
 + * Set non-size attributes of an inode.
 + *
 + * Caution: The caller of this function is responsible for calling
 + * setattr_prepare() or otherwise verifying the change is fine.
 + */
  int
  xfs_setattr_nonsize(
        struct xfs_inode        *ip,
        struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
        struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
  
 -      trace_xfs_setattr(ip);
 -
 -      /* If acls are being inherited, we already have this checked */
 -      if (!(flags & XFS_ATTR_NOACL)) {
 -              if (mp->m_flags & XFS_MOUNT_RDONLY)
 -                      return -EROFS;
 -
 -              if (XFS_FORCED_SHUTDOWN(mp))
 -                      return -EIO;
 -
 -              error = inode_change_ok(inode, iattr);
 -              if (error)
 -                      return error;
 -      }
 -
        ASSERT((mask & ATTR_SIZE) == 0);
  
        /*
@@@ -750,27 -743,8 +750,27 @@@ out_dqrele
        return error;
  }
  
 +int
 +xfs_vn_setattr_nonsize(
 +      struct dentry           *dentry,
 +      struct iattr            *iattr)
 +{
 +      struct xfs_inode        *ip = XFS_I(d_inode(dentry));
 +      int error;
 +
 +      trace_xfs_setattr(ip);
 +
 +      error = xfs_vn_change_ok(dentry, iattr);
 +      if (error)
 +              return error;
 +      return xfs_setattr_nonsize(ip, iattr, 0);
 +}
 +
  /*
   * Truncate file.  Must have write permission and not be a directory.
 + *
 + * Caution: The caller of this function is responsible for calling
 + * setattr_prepare() or otherwise verifying the change is fine.
   */
  int
  xfs_setattr_size(
        uint                    lock_flags = 0;
        bool                    did_zeroing = false;
  
 -      trace_xfs_setattr(ip);
 -
 -      if (mp->m_flags & XFS_MOUNT_RDONLY)
 -              return -EROFS;
 -
 -      if (XFS_FORCED_SHUTDOWN(mp))
 -              return -EIO;
 -
 -      error = inode_change_ok(inode, iattr);
 -      if (error)
 -              return error;
 -
        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
        ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
        ASSERT(S_ISREG(inode->i_mode));
        if (newsize != oldsize &&
            !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
                iattr->ia_ctime = iattr->ia_mtime =
 -                      current_fs_time(inode->i_sb);
 +                      current_time(inode);
                iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
        }
  
@@@ -956,32 -942,16 +956,32 @@@ out_trans_cancel
        goto out_unlock;
  }
  
 +int
 +xfs_vn_setattr_size(
 +      struct dentry           *dentry,
 +      struct iattr            *iattr)
 +{
 +      struct xfs_inode        *ip = XFS_I(d_inode(dentry));
 +      int error;
 +
 +      trace_xfs_setattr(ip);
 +
 +      error = xfs_vn_change_ok(dentry, iattr);
 +      if (error)
 +              return error;
 +      return xfs_setattr_size(ip, iattr);
 +}
 +
  STATIC int
  xfs_vn_setattr(
        struct dentry           *dentry,
        struct iattr            *iattr)
  {
 -      struct xfs_inode        *ip = XFS_I(d_inode(dentry));
        int                     error;
  
        if (iattr->ia_valid & ATTR_SIZE) {
 -              uint            iolock = XFS_IOLOCK_EXCL;
 +              struct xfs_inode        *ip = XFS_I(d_inode(dentry));
 +              uint                    iolock = XFS_IOLOCK_EXCL;
  
                xfs_ilock(ip, iolock);
                error = xfs_break_layouts(d_inode(dentry), &iolock, true);
                        xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
                        iolock |= XFS_MMAPLOCK_EXCL;
  
 -                      error = xfs_setattr_size(ip, iattr);
 +                      error = xfs_vn_setattr_size(dentry, iattr);
                }
                xfs_iunlock(ip, iolock);
        } else {
 -              error = xfs_setattr_nonsize(ip, iattr, 0);
 +              error = xfs_vn_setattr_nonsize(dentry, iattr);
        }
  
        return error;
@@@ -1066,6 -1036,9 +1066,6 @@@ static const struct inode_operations xf
        .set_acl                = xfs_set_acl,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
 -      .setxattr               = generic_setxattr,
 -      .getxattr               = generic_getxattr,
 -      .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .fiemap                 = xfs_vn_fiemap,
        .update_time            = xfs_vn_update_time,
@@@ -1086,11 -1059,14 +1086,11 @@@ static const struct inode_operations xf
         */
        .rmdir                  = xfs_vn_unlink,
        .mknod                  = xfs_vn_mknod,
 -      .rename2                = xfs_vn_rename,
 +      .rename                 = xfs_vn_rename,
        .get_acl                = xfs_get_acl,
        .set_acl                = xfs_set_acl,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
 -      .setxattr               = generic_setxattr,
 -      .getxattr               = generic_getxattr,
 -      .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .update_time            = xfs_vn_update_time,
        .tmpfile                = xfs_vn_tmpfile,
@@@ -1111,11 -1087,14 +1111,11 @@@ static const struct inode_operations xf
         */
        .rmdir                  = xfs_vn_unlink,
        .mknod                  = xfs_vn_mknod,
 -      .rename2                = xfs_vn_rename,
 +      .rename                 = xfs_vn_rename,
        .get_acl                = xfs_get_acl,
        .set_acl                = xfs_set_acl,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
 -      .setxattr               = generic_setxattr,
 -      .getxattr               = generic_getxattr,
 -      .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .update_time            = xfs_vn_update_time,
        .tmpfile                = xfs_vn_tmpfile,
@@@ -1126,6 -1105,9 +1126,6 @@@ static const struct inode_operations xf
        .get_link               = xfs_vn_get_link,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
 -      .setxattr               = generic_setxattr,
 -      .getxattr               = generic_getxattr,
 -      .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .update_time            = xfs_vn_update_time,
  };
@@@ -1135,6 -1117,9 +1135,6 @@@ static const struct inode_operations xf
        .get_link               = xfs_vn_get_link_inline,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
 -      .setxattr               = generic_setxattr,
 -      .getxattr               = generic_getxattr,
 -      .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .update_time            = xfs_vn_update_time,
  };
@@@ -1159,6 -1144,7 +1159,7 @@@ xfs_diflags_to_iflags
                inode->i_flags |= S_NOATIME;
        if (S_ISREG(inode->i_mode) &&
            ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
+           !xfs_is_reflink_inode(ip) &&
            (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
             ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
                inode->i_flags |= S_DAX;
diff --combined fs/xfs/xfs_trace.h
@@@ -39,6 -39,7 +39,7 @@@ struct xfs_buf_log_format
  struct xfs_inode_log_format;
  struct xfs_bmbt_irec;
  struct xfs_btree_cur;
+ struct xfs_refcount_irec;
  
  DECLARE_EVENT_CLASS(xfs_attr_list_class,
        TP_PROTO(struct xfs_attr_list_context *ctx),
@@@ -135,6 -136,8 +136,8 @@@ DEFINE_PERAG_REF_EVENT(xfs_perag_set_re
  DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
  DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks);
  DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks);
+ DEFINE_PERAG_REF_EVENT(xfs_perag_set_cowblocks);
+ DEFINE_PERAG_REF_EVENT(xfs_perag_clear_cowblocks);
  
  DECLARE_EVENT_CLASS(xfs_ag_class,
        TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno),
@@@ -268,10 -271,10 +271,10 @@@ DECLARE_EVENT_CLASS(xfs_bmap_class
                __field(unsigned long, caller_ip)
        ),
        TP_fast_assign(
-               struct xfs_ifork        *ifp = (state & BMAP_ATTRFORK) ?
-                                               ip->i_afp : &ip->i_df;
+               struct xfs_ifork        *ifp;
                struct xfs_bmbt_irec    r;
  
+               ifp = xfs_iext_state_to_fork(ip, state);
                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
                __entry->dev = VFS_I(ip)->i_sb->s_dev;
                __entry->ino = ip->i_ino;
@@@ -686,6 -689,9 +689,9 @@@ DEFINE_INODE_EVENT(xfs_dquot_dqdetach)
  DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
  DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
  DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
+ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
+ DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
+ DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
  
  DEFINE_INODE_EVENT(xfs_filemap_fault);
  DEFINE_INODE_EVENT(xfs_filemap_pmd_fault);
@@@ -1170,6 -1176,7 +1176,6 @@@ DEFINE_RW_EVENT(xfs_file_dax_read)
  DEFINE_RW_EVENT(xfs_file_buffered_write);
  DEFINE_RW_EVENT(xfs_file_direct_write);
  DEFINE_RW_EVENT(xfs_file_dax_write);
 -DEFINE_RW_EVENT(xfs_file_splice_read);
  
  DECLARE_EVENT_CLASS(xfs_page_class,
        TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
@@@ -2581,10 -2588,20 +2587,20 @@@ DEFINE_RMAPBT_EVENT(xfs_rmap_delete)
  DEFINE_AG_ERROR_EVENT(xfs_rmap_insert_error);
  DEFINE_AG_ERROR_EVENT(xfs_rmap_delete_error);
  DEFINE_AG_ERROR_EVENT(xfs_rmap_update_error);
+ DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_candidate);
+ DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_query);
+ DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_candidate);
+ DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range);
  DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result);
  DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result);
  DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result);
  
+ /* deferred bmbt updates */
+ #define DEFINE_BMAP_DEFERRED_EVENT    DEFINE_RMAP_DEFERRED_EVENT
+ DEFINE_BMAP_DEFERRED_EVENT(xfs_bmap_defer);
+ DEFINE_BMAP_DEFERRED_EVENT(xfs_bmap_deferred);
  /* per-AG reservation */
  DECLARE_EVENT_CLASS(xfs_ag_resv_class,
        TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type resv,
@@@ -2639,6 -2656,728 +2655,728 @@@ DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed
  DEFINE_AG_ERROR_EVENT(xfs_ag_resv_free_error);
  DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error);
  
+ /* refcount tracepoint classes */
+ /* reuse the discard trace class for agbno/aglen-based traces */
+ #define DEFINE_AG_EXTENT_EVENT(name) DEFINE_DISCARD_EVENT(name)
+ /* ag btree lookup tracepoint class */
+ #define XFS_AG_BTREE_CMP_FORMAT_STR \
+       { XFS_LOOKUP_EQ,        "eq" }, \
+       { XFS_LOOKUP_LE,        "le" }, \
+       { XFS_LOOKUP_GE,        "ge" }
+ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_lookup_t dir),
+       TP_ARGS(mp, agno, agbno, dir),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_lookup_t, dir)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->dir = dir;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __print_symbolic(__entry->dir, XFS_AG_BTREE_CMP_FORMAT_STR),
+                 __entry->dir)
+ )
+ #define DEFINE_AG_BTREE_LOOKUP_EVENT(name) \
+ DEFINE_EVENT(xfs_ag_btree_lookup_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                xfs_agblock_t agbno, xfs_lookup_t dir), \
+       TP_ARGS(mp, agno, agbno, dir))
+ /* single-rcext tracepoint class */
+ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                struct xfs_refcount_irec *irec),
+       TP_ARGS(mp, agno, irec),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, startblock)
+               __field(xfs_extlen_t, blockcount)
+               __field(xfs_nlink_t, refcount)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->startblock = irec->rc_startblock;
+               __entry->blockcount = irec->rc_blockcount;
+               __entry->refcount = irec->rc_refcount;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->startblock,
+                 __entry->blockcount,
+                 __entry->refcount)
+ )
+ #define DEFINE_REFCOUNT_EXTENT_EVENT(name) \
+ DEFINE_EVENT(xfs_refcount_extent_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                struct xfs_refcount_irec *irec), \
+       TP_ARGS(mp, agno, irec))
+ /* single-rcext and an agbno tracepoint class */
+ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                struct xfs_refcount_irec *irec, xfs_agblock_t agbno),
+       TP_ARGS(mp, agno, irec, agbno),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, startblock)
+               __field(xfs_extlen_t, blockcount)
+               __field(xfs_nlink_t, refcount)
+               __field(xfs_agblock_t, agbno)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->startblock = irec->rc_startblock;
+               __entry->blockcount = irec->rc_blockcount;
+               __entry->refcount = irec->rc_refcount;
+               __entry->agbno = agbno;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->startblock,
+                 __entry->blockcount,
+                 __entry->refcount,
+                 __entry->agbno)
+ )
+ #define DEFINE_REFCOUNT_EXTENT_AT_EVENT(name) \
+ DEFINE_EVENT(xfs_refcount_extent_at_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                struct xfs_refcount_irec *irec, xfs_agblock_t agbno), \
+       TP_ARGS(mp, agno, irec, agbno))
+ /* double-rcext tracepoint class */
+ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2),
+       TP_ARGS(mp, agno, i1, i2),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, i1_startblock)
+               __field(xfs_extlen_t, i1_blockcount)
+               __field(xfs_nlink_t, i1_refcount)
+               __field(xfs_agblock_t, i2_startblock)
+               __field(xfs_extlen_t, i2_blockcount)
+               __field(xfs_nlink_t, i2_refcount)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->i1_startblock = i1->rc_startblock;
+               __entry->i1_blockcount = i1->rc_blockcount;
+               __entry->i1_refcount = i1->rc_refcount;
+               __entry->i2_startblock = i2->rc_startblock;
+               __entry->i2_blockcount = i2->rc_blockcount;
+               __entry->i2_refcount = i2->rc_refcount;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
+                 "agbno %u len %u refcount %u\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->i1_startblock,
+                 __entry->i1_blockcount,
+                 __entry->i1_refcount,
+                 __entry->i2_startblock,
+                 __entry->i2_blockcount,
+                 __entry->i2_refcount)
+ )
+ #define DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(name) \
+ DEFINE_EVENT(xfs_refcount_double_extent_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2), \
+       TP_ARGS(mp, agno, i1, i2))
+ /* double-rcext and an agbno tracepoint class */
+ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2,
+                xfs_agblock_t agbno),
+       TP_ARGS(mp, agno, i1, i2, agbno),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, i1_startblock)
+               __field(xfs_extlen_t, i1_blockcount)
+               __field(xfs_nlink_t, i1_refcount)
+               __field(xfs_agblock_t, i2_startblock)
+               __field(xfs_extlen_t, i2_blockcount)
+               __field(xfs_nlink_t, i2_refcount)
+               __field(xfs_agblock_t, agbno)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->i1_startblock = i1->rc_startblock;
+               __entry->i1_blockcount = i1->rc_blockcount;
+               __entry->i1_refcount = i1->rc_refcount;
+               __entry->i2_startblock = i2->rc_startblock;
+               __entry->i2_blockcount = i2->rc_blockcount;
+               __entry->i2_refcount = i2->rc_refcount;
+               __entry->agbno = agbno;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
+                 "agbno %u len %u refcount %u @ agbno %u\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->i1_startblock,
+                 __entry->i1_blockcount,
+                 __entry->i1_refcount,
+                 __entry->i2_startblock,
+                 __entry->i2_blockcount,
+                 __entry->i2_refcount,
+                 __entry->agbno)
+ )
+ #define DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(name) \
+ DEFINE_EVENT(xfs_refcount_double_extent_at_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, \
+                xfs_agblock_t agbno), \
+       TP_ARGS(mp, agno, i1, i2, agbno))
+ /* triple-rcext tracepoint class */
+ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2,
+                struct xfs_refcount_irec *i3),
+       TP_ARGS(mp, agno, i1, i2, i3),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, i1_startblock)
+               __field(xfs_extlen_t, i1_blockcount)
+               __field(xfs_nlink_t, i1_refcount)
+               __field(xfs_agblock_t, i2_startblock)
+               __field(xfs_extlen_t, i2_blockcount)
+               __field(xfs_nlink_t, i2_refcount)
+               __field(xfs_agblock_t, i3_startblock)
+               __field(xfs_extlen_t, i3_blockcount)
+               __field(xfs_nlink_t, i3_refcount)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->i1_startblock = i1->rc_startblock;
+               __entry->i1_blockcount = i1->rc_blockcount;
+               __entry->i1_refcount = i1->rc_refcount;
+               __entry->i2_startblock = i2->rc_startblock;
+               __entry->i2_blockcount = i2->rc_blockcount;
+               __entry->i2_refcount = i2->rc_refcount;
+               __entry->i3_startblock = i3->rc_startblock;
+               __entry->i3_blockcount = i3->rc_blockcount;
+               __entry->i3_refcount = i3->rc_refcount;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
+                 "agbno %u len %u refcount %u -- "
+                 "agbno %u len %u refcount %u\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->i1_startblock,
+                 __entry->i1_blockcount,
+                 __entry->i1_refcount,
+                 __entry->i2_startblock,
+                 __entry->i2_blockcount,
+                 __entry->i2_refcount,
+                 __entry->i3_startblock,
+                 __entry->i3_blockcount,
+                 __entry->i3_refcount)
+ );
+ #define DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(name) \
+ DEFINE_EVENT(xfs_refcount_triple_extent_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, \
+                struct xfs_refcount_irec *i3), \
+       TP_ARGS(mp, agno, i1, i2, i3))
+ /* refcount btree tracepoints */
+ DEFINE_BUSY_EVENT(xfs_refcountbt_alloc_block);
+ DEFINE_BUSY_EVENT(xfs_refcountbt_free_block);
+ DEFINE_AG_BTREE_LOOKUP_EVENT(xfs_refcount_lookup);
+ DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_get);
+ DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_update);
+ DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_insert);
+ DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_delete);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_insert_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_delete_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_update_error);
+ /* refcount adjustment tracepoints */
+ DEFINE_AG_EXTENT_EVENT(xfs_refcount_increase);
+ DEFINE_AG_EXTENT_EVENT(xfs_refcount_decrease);
+ DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_increase);
+ DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_decrease);
+ DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(xfs_refcount_merge_center_extents);
+ DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_modify_extent);
+ DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_recover_extent);
+ DEFINE_REFCOUNT_EXTENT_AT_EVENT(xfs_refcount_split_extent);
+ DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_left_extent);
+ DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_right_extent);
+ DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_left_extent);
+ DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_right_extent);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_cow_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_center_extents_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_modify_extent_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_split_extent_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_left_extent_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_right_extent_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_find_left_extent_error);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_find_right_extent_error);
+ /* reflink helpers */
+ DEFINE_AG_EXTENT_EVENT(xfs_refcount_find_shared);
+ DEFINE_AG_EXTENT_EVENT(xfs_refcount_find_shared_result);
+ DEFINE_AG_ERROR_EVENT(xfs_refcount_find_shared_error);
+ #define DEFINE_REFCOUNT_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
+ DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_defer);
+ DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_deferred);
+ TRACE_EVENT(xfs_refcount_finish_one_leftover,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                int type, xfs_agblock_t agbno, xfs_extlen_t len,
+                xfs_agblock_t new_agbno, xfs_extlen_t new_len),
+       TP_ARGS(mp, agno, type, agbno, len, new_agbno, new_len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, type)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+               __field(xfs_agblock_t, new_agbno)
+               __field(xfs_extlen_t, new_len)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->type = type;
+               __entry->agbno = agbno;
+               __entry->len = len;
+               __entry->new_agbno = new_agbno;
+               __entry->new_len = new_len;
+       ),
+       TP_printk("dev %d:%d type %d agno %u agbno %u len %u new_agbno %u new_len %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->type,
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len,
+                 __entry->new_agbno,
+                 __entry->new_len)
+ );
+ /* simple inode-based error/%ip tracepoint class */
+ DECLARE_EVENT_CLASS(xfs_inode_error_class,
+       TP_PROTO(struct xfs_inode *ip, int error, unsigned long caller_ip),
+       TP_ARGS(ip, error, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, error)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->error = error;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino %llx error %d caller %ps",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->error,
+                 (char *)__entry->caller_ip)
+ );
+ #define DEFINE_INODE_ERROR_EVENT(name) \
+ DEFINE_EVENT(xfs_inode_error_class, name, \
+       TP_PROTO(struct xfs_inode *ip, int error, \
+                unsigned long caller_ip), \
+       TP_ARGS(ip, error, caller_ip))
+ /* reflink allocator */
+ TRACE_EVENT(xfs_bmap_remap_alloc,
+       TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno,
+                xfs_extlen_t len),
+       TP_ARGS(ip, fsbno, len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsblock_t, fsbno)
+               __field(xfs_extlen_t, len)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->fsbno = fsbno;
+               __entry->len = len;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->fsbno,
+                 __entry->len)
+ );
+ DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error);
+ /* reflink tracepoint classes */
+ /* two-file io tracepoint class */
+ DECLARE_EVENT_CLASS(xfs_double_io_class,
+       TP_PROTO(struct xfs_inode *src, xfs_off_t soffset, xfs_off_t len,
+                struct xfs_inode *dest, xfs_off_t doffset),
+       TP_ARGS(src, soffset, len, dest, doffset),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, src_ino)
+               __field(loff_t, src_isize)
+               __field(loff_t, src_disize)
+               __field(loff_t, src_offset)
+               __field(size_t, len)
+               __field(xfs_ino_t, dest_ino)
+               __field(loff_t, dest_isize)
+               __field(loff_t, dest_disize)
+               __field(loff_t, dest_offset)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(src)->i_sb->s_dev;
+               __entry->src_ino = src->i_ino;
+               __entry->src_isize = VFS_I(src)->i_size;
+               __entry->src_disize = src->i_d.di_size;
+               __entry->src_offset = soffset;
+               __entry->len = len;
+               __entry->dest_ino = dest->i_ino;
+               __entry->dest_isize = VFS_I(dest)->i_size;
+               __entry->dest_disize = dest->i_d.di_size;
+               __entry->dest_offset = doffset;
+       ),
+       TP_printk("dev %d:%d count %zd "
+                 "ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx -> "
+                 "ino 0x%llx isize 0x%llx disize 0x%llx offset 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->len,
+                 __entry->src_ino,
+                 __entry->src_isize,
+                 __entry->src_disize,
+                 __entry->src_offset,
+                 __entry->dest_ino,
+                 __entry->dest_isize,
+                 __entry->dest_disize,
+                 __entry->dest_offset)
+ )
+ #define DEFINE_DOUBLE_IO_EVENT(name)  \
+ DEFINE_EVENT(xfs_double_io_class, name,       \
+       TP_PROTO(struct xfs_inode *src, xfs_off_t soffset, xfs_off_t len, \
+                struct xfs_inode *dest, xfs_off_t doffset), \
+       TP_ARGS(src, soffset, len, dest, doffset))
+ /* two-file vfs io tracepoint class */
+ DECLARE_EVENT_CLASS(xfs_double_vfs_io_class,
+       TP_PROTO(struct inode *src, u64 soffset, u64 len,
+                struct inode *dest, u64 doffset),
+       TP_ARGS(src, soffset, len, dest, doffset),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned long, src_ino)
+               __field(loff_t, src_isize)
+               __field(loff_t, src_offset)
+               __field(size_t, len)
+               __field(unsigned long, dest_ino)
+               __field(loff_t, dest_isize)
+               __field(loff_t, dest_offset)
+       ),
+       TP_fast_assign(
+               __entry->dev = src->i_sb->s_dev;
+               __entry->src_ino = src->i_ino;
+               __entry->src_isize = i_size_read(src);
+               __entry->src_offset = soffset;
+               __entry->len = len;
+               __entry->dest_ino = dest->i_ino;
+               __entry->dest_isize = i_size_read(dest);
+               __entry->dest_offset = doffset;
+       ),
+       TP_printk("dev %d:%d count %zd "
+                 "ino 0x%lx isize 0x%llx offset 0x%llx -> "
+                 "ino 0x%lx isize 0x%llx offset 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->len,
+                 __entry->src_ino,
+                 __entry->src_isize,
+                 __entry->src_offset,
+                 __entry->dest_ino,
+                 __entry->dest_isize,
+                 __entry->dest_offset)
+ )
+ #define DEFINE_DOUBLE_VFS_IO_EVENT(name)      \
+ DEFINE_EVENT(xfs_double_vfs_io_class, name,   \
+       TP_PROTO(struct inode *src, u64 soffset, u64 len, \
+                struct inode *dest, u64 doffset), \
+       TP_ARGS(src, soffset, len, dest, doffset))
+ /* CoW write tracepoint */
+ DECLARE_EVENT_CLASS(xfs_copy_on_write_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk,
+                xfs_extlen_t len, xfs_fsblock_t new_pblk),
+       TP_ARGS(ip, lblk, pblk, len, new_pblk),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fileoff_t, lblk)
+               __field(xfs_fsblock_t, pblk)
+               __field(xfs_extlen_t, len)
+               __field(xfs_fsblock_t, new_pblk)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->lblk = lblk;
+               __entry->pblk = pblk;
+               __entry->len = len;
+               __entry->new_pblk = new_pblk;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx pblk 0x%llx "
+                 "len 0x%x new_pblk %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->lblk,
+                 __entry->pblk,
+                 __entry->len,
+                 __entry->new_pblk)
+ )
+ #define DEFINE_COW_EVENT(name)        \
+ DEFINE_EVENT(xfs_copy_on_write_class, name,   \
+       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk, xfs_fsblock_t pblk, \
+                xfs_extlen_t len, xfs_fsblock_t new_pblk), \
+       TP_ARGS(ip, lblk, pblk, len, new_pblk))
+ /* inode/irec events */
+ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
+       TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec),
+       TP_ARGS(ip, irec),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fileoff_t, lblk)
+               __field(xfs_extlen_t, len)
+               __field(xfs_fsblock_t, pblk)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->lblk = irec->br_startoff;
+               __entry->len = irec->br_blockcount;
+               __entry->pblk = irec->br_startblock;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->lblk,
+                 __entry->len,
+                 __entry->pblk)
+ );
+ #define DEFINE_INODE_IREC_EVENT(name) \
+ DEFINE_EVENT(xfs_inode_irec_class, name, \
+       TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), \
+       TP_ARGS(ip, irec))
+ /* refcount/reflink tracepoint definitions */
+ /* reflink tracepoints */
+ DEFINE_INODE_EVENT(xfs_reflink_set_inode_flag);
+ DEFINE_INODE_EVENT(xfs_reflink_unset_inode_flag);
+ DEFINE_ITRUNC_EVENT(xfs_reflink_update_inode_size);
+ DEFINE_IOMAP_EVENT(xfs_reflink_remap_imap);
+ TRACE_EVENT(xfs_reflink_remap_blocks_loop,
+       TP_PROTO(struct xfs_inode *src, xfs_fileoff_t soffset,
+                xfs_filblks_t len, struct xfs_inode *dest,
+                xfs_fileoff_t doffset),
+       TP_ARGS(src, soffset, len, dest, doffset),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, src_ino)
+               __field(xfs_fileoff_t, src_lblk)
+               __field(xfs_filblks_t, len)
+               __field(xfs_ino_t, dest_ino)
+               __field(xfs_fileoff_t, dest_lblk)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(src)->i_sb->s_dev;
+               __entry->src_ino = src->i_ino;
+               __entry->src_lblk = soffset;
+               __entry->len = len;
+               __entry->dest_ino = dest->i_ino;
+               __entry->dest_lblk = doffset;
+       ),
+       TP_printk("dev %d:%d len 0x%llx "
+                 "ino 0x%llx offset 0x%llx blocks -> "
+                 "ino 0x%llx offset 0x%llx blocks",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->len,
+                 __entry->src_ino,
+                 __entry->src_lblk,
+                 __entry->dest_ino,
+                 __entry->dest_lblk)
+ );
+ TRACE_EVENT(xfs_reflink_punch_range,
+       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk,
+                xfs_extlen_t len),
+       TP_ARGS(ip, lblk, len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fileoff_t, lblk)
+               __field(xfs_extlen_t, len)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->lblk = lblk;
+               __entry->len = len;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->lblk,
+                 __entry->len)
+ );
+ TRACE_EVENT(xfs_reflink_remap,
+       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk,
+                xfs_extlen_t len, xfs_fsblock_t new_pblk),
+       TP_ARGS(ip, lblk, len, new_pblk),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fileoff_t, lblk)
+               __field(xfs_extlen_t, len)
+               __field(xfs_fsblock_t, new_pblk)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->lblk = lblk;
+               __entry->len = len;
+               __entry->new_pblk = new_pblk;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x new_pblk %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->lblk,
+                 __entry->len,
+                 __entry->new_pblk)
+ );
+ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_range);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_reflink_main_loop_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_read_iomap_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_blocks_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error);
+ /* dedupe tracepoints */
+ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_compare_extents_error);
+ /* ioctl tracepoints */
+ DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_reflink);
+ DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_clone_range);
+ DEFINE_DOUBLE_VFS_IO_EVENT(xfs_ioctl_file_extent_same);
+ TRACE_EVENT(xfs_ioctl_clone,
+       TP_PROTO(struct inode *src, struct inode *dest),
+       TP_ARGS(src, dest),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned long, src_ino)
+               __field(loff_t, src_isize)
+               __field(unsigned long, dest_ino)
+               __field(loff_t, dest_isize)
+       ),
+       TP_fast_assign(
+               __entry->dev = src->i_sb->s_dev;
+               __entry->src_ino = src->i_ino;
+               __entry->src_isize = i_size_read(src);
+               __entry->dest_ino = dest->i_ino;
+               __entry->dest_isize = i_size_read(dest);
+       ),
+       TP_printk("dev %d:%d "
+                 "ino 0x%lx isize 0x%llx -> "
+                 "ino 0x%lx isize 0x%llx\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->src_ino,
+                 __entry->src_isize,
+                 __entry->dest_ino,
+                 __entry->dest_isize)
+ );
+ /* unshare tracepoints */
+ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_unshare);
+ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cow_eof_block);
+ DEFINE_PAGE_EVENT(xfs_reflink_unshare_page);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_cow_eof_block_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_dirty_page_error);
+ /* copy on write */
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
+ DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range);
+ DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
+ DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
+ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
+ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
+ DEFINE_COW_EVENT(xfs_reflink_fork_buf);
+ DEFINE_COW_EVENT(xfs_reflink_finish_fork_buf);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_fork_buf_error);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_finish_fork_buf_error);
+ DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow);
+ DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow);
+ DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error);
+ /* rmap swapext tracepoints */
+ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
+ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
+ DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
  #endif /* _TRACE_XFS_H */
  
  #undef TRACE_INCLUDE_PATH
diff --combined include/uapi/linux/fs.h
@@@ -132,7 -132,6 +132,7 @@@ struct inodes_stat_t 
  #define MS_LAZYTIME   (1<<25) /* Update the on-disk [acm]times lazily */
  
  /* These sb flags are internal to the kernel */
 +#define MS_NOREMOTELOCK       (1<<27)
  #define MS_NOSEC      (1<<28)
  #define MS_BORN               (1<<29)
  #define MS_ACTIVE     (1<<30)
@@@ -158,7 -157,8 +158,8 @@@ struct fsxattr 
        __u32           fsx_extsize;    /* extsize field value (get/set)*/
        __u32           fsx_nextents;   /* nextents field value (get)   */
        __u32           fsx_projid;     /* project identifier (get/set) */
-       unsigned char   fsx_pad[12];
+       __u32           fsx_cowextsize; /* CoW extsize field value (get/set)*/
+       unsigned char   fsx_pad[8];
  };
  
  /*
  #define FS_XFLAG_NODEFRAG     0x00002000      /* do not defragment */
  #define FS_XFLAG_FILESTREAM   0x00004000      /* use filestream allocator */
  #define FS_XFLAG_DAX          0x00008000      /* use DAX for IO */
+ #define FS_XFLAG_COWEXTSIZE   0x00010000      /* CoW extent size allocator hint */
  #define FS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
  
  /* the read-only stuff doesn't really belong here, but any other place is