IB/srp: Fix kernel-doc warnings
[cascardo/linux.git] / fs / f2fs / segment.c
index fa284d3..085f548 100644 (file)
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/prefetch.h>
+#include <linux/kthread.h>
 #include <linux/vmalloc.h>
+#include <linux/swap.h>
 
 #include "f2fs.h"
 #include "segment.h"
 #include "node.h"
 #include <trace/events/f2fs.h>
 
+#define __reverse_ffz(x) __reverse_ffs(~(x))
+
+static struct kmem_cache *discard_entry_slab;
+static struct kmem_cache *flush_cmd_slab;
+
+/*
+ * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
+ * MSB and LSB are reversed in a byte by f2fs_set_bit.
+ */
+static inline unsigned long __reverse_ffs(unsigned long word)
+{
+       int num = 0;
+
+#if BITS_PER_LONG == 64
+       if ((word & 0xffffffff) == 0) {
+               num += 32;
+               word >>= 32;
+       }
+#endif
+       if ((word & 0xffff) == 0) {
+               num += 16;
+               word >>= 16;
+       }
+       if ((word & 0xff) == 0) {
+               num += 8;
+               word >>= 8;
+       }
+       if ((word & 0xf0) == 0)
+               num += 4;
+       else
+               word >>= 4;
+       if ((word & 0xc) == 0)
+               num += 2;
+       else
+               word >>= 2;
+       if ((word & 0x2) == 0)
+               num += 1;
+       return num;
+}
+
+/*
+ * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue
+ * f2fs_set_bit makes MSB and LSB reversed in a byte.
+ * Example:
+ *                             LSB <--> MSB
+ *   f2fs_set_bit(0, bitmap) => 0000 0001
+ *   f2fs_set_bit(7, bitmap) => 1000 0000
+ */
+static unsigned long __find_rev_next_bit(const unsigned long *addr,
+                       unsigned long size, unsigned long offset)
+{
+       const unsigned long *p = addr + BIT_WORD(offset);
+       unsigned long result = offset & ~(BITS_PER_LONG - 1);
+       unsigned long tmp;
+       unsigned long mask, submask;
+       unsigned long quot, rest;
+
+       if (offset >= size)
+               return size;
+
+       size -= result;
+       offset %= BITS_PER_LONG;
+       if (!offset)
+               goto aligned;
+
+       tmp = *(p++);
+       quot = (offset >> 3) << 3;
+       rest = offset & 0x7;
+       mask = ~0UL << quot;
+       submask = (unsigned char)(0xff << rest) >> rest;
+       submask <<= quot;
+       mask &= submask;
+       tmp &= mask;
+       if (size < BITS_PER_LONG)
+               goto found_first;
+       if (tmp)
+               goto found_middle;
+
+       size -= BITS_PER_LONG;
+       result += BITS_PER_LONG;
+aligned:
+       while (size & ~(BITS_PER_LONG-1)) {
+               tmp = *(p++);
+               if (tmp)
+                       goto found_middle;
+               result += BITS_PER_LONG;
+               size -= BITS_PER_LONG;
+       }
+       if (!size)
+               return result;
+       tmp = *p;
+found_first:
+       tmp &= (~0UL >> (BITS_PER_LONG - size));
+       if (tmp == 0UL)         /* Are any bits set? */
+               return result + size;   /* Nope. */
+found_middle:
+       return result + __reverse_ffs(tmp);
+}
+
+static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
+                       unsigned long size, unsigned long offset)
+{
+       const unsigned long *p = addr + BIT_WORD(offset);
+       unsigned long result = offset & ~(BITS_PER_LONG - 1);
+       unsigned long tmp;
+       unsigned long mask, submask;
+       unsigned long quot, rest;
+
+       if (offset >= size)
+               return size;
+
+       size -= result;
+       offset %= BITS_PER_LONG;
+       if (!offset)
+               goto aligned;
+
+       tmp = *(p++);
+       quot = (offset >> 3) << 3;
+       rest = offset & 0x7;
+       mask = ~(~0UL << quot);
+       submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
+       submask <<= quot;
+       mask += submask;
+       tmp |= mask;
+       if (size < BITS_PER_LONG)
+               goto found_first;
+       if (~tmp)
+               goto found_middle;
+
+       size -= BITS_PER_LONG;
+       result += BITS_PER_LONG;
+aligned:
+       while (size & ~(BITS_PER_LONG - 1)) {
+               tmp = *(p++);
+               if (~tmp)
+                       goto found_middle;
+               result += BITS_PER_LONG;
+               size -= BITS_PER_LONG;
+       }
+       if (!size)
+               return result;
+       tmp = *p;
+
+found_first:
+       tmp |= ~0UL << size;
+       if (tmp == ~0UL)        /* Are any bits zero? */
+               return result + size;   /* Nope. */
+found_middle:
+       return result + __reverse_ffz(tmp);
+}
+
 /*
  * This function balances dirty node and dentry pages.
  * In addition, it controls garbage collection.
@@ -44,6 +197,73 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
                f2fs_sync_fs(sbi->sb, true);
 }
 
+static int issue_flush_thread(void *data)
+{
+       struct f2fs_sb_info *sbi = data;
+       struct f2fs_sm_info *sm_i = SM_I(sbi);
+       wait_queue_head_t *q = &sm_i->flush_wait_queue;
+repeat:
+       if (kthread_should_stop())
+               return 0;
+
+       spin_lock(&sm_i->issue_lock);
+       if (sm_i->issue_list) {
+               sm_i->dispatch_list = sm_i->issue_list;
+               sm_i->issue_list = sm_i->issue_tail = NULL;
+       }
+       spin_unlock(&sm_i->issue_lock);
+
+       if (sm_i->dispatch_list) {
+               struct bio *bio = bio_alloc(GFP_NOIO, 0);
+               struct flush_cmd *cmd, *next;
+               int ret;
+
+               bio->bi_bdev = sbi->sb->s_bdev;
+               ret = submit_bio_wait(WRITE_FLUSH, bio);
+
+               for (cmd = sm_i->dispatch_list; cmd; cmd = next) {
+                       cmd->ret = ret;
+                       next = cmd->next;
+                       complete(&cmd->wait);
+               }
+               sm_i->dispatch_list = NULL;
+       }
+
+       wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list);
+       goto repeat;
+}
+
+int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_sm_info *sm_i = SM_I(sbi);
+       struct flush_cmd *cmd;
+       int ret;
+
+       if (!test_opt(sbi, FLUSH_MERGE))
+               return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
+
+       cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC);
+       cmd->next = NULL;
+       cmd->ret = 0;
+       init_completion(&cmd->wait);
+
+       spin_lock(&sm_i->issue_lock);
+       if (sm_i->issue_list)
+               sm_i->issue_tail->next = cmd;
+       else
+               sm_i->issue_list = cmd;
+       sm_i->issue_tail = cmd;
+       spin_unlock(&sm_i->issue_lock);
+
+       if (!sm_i->dispatch_list)
+               wake_up(&sm_i->flush_wait_queue);
+
+       wait_for_completion(&cmd->wait);
+       ret = cmd->ret;
+       kmem_cache_free(flush_cmd_slab, cmd);
+       return ret;
+}
+
 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                enum dirty_type dirty_type)
 {
@@ -116,6 +336,56 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
        mutex_unlock(&dirty_i->seglist_lock);
 }
 
+static void f2fs_issue_discard(struct f2fs_sb_info *sbi,
+                               block_t blkstart, block_t blklen)
+{
+       sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
+       sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
+       blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
+       trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
+}
+
+static void add_discard_addrs(struct f2fs_sb_info *sbi,
+                       unsigned int segno, struct seg_entry *se)
+{
+       struct list_head *head = &SM_I(sbi)->discard_list;
+       struct discard_entry *new;
+       int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
+       int max_blocks = sbi->blocks_per_seg;
+       unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
+       unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+       unsigned long dmap[entries];
+       unsigned int start = 0, end = -1;
+       int i;
+
+       if (!test_opt(sbi, DISCARD))
+               return;
+
+       /* zero block will be discarded through the prefree list */
+       if (!se->valid_blocks || se->valid_blocks == max_blocks)
+               return;
+
+       /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
+       for (i = 0; i < entries; i++)
+               dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
+
+       while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
+               start = __find_rev_next_bit(dmap, max_blocks, end + 1);
+               if (start >= max_blocks)
+                       break;
+
+               end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
+
+               new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
+               INIT_LIST_HEAD(&new->list);
+               new->blkaddr = START_BLOCK(sbi, segno) + start;
+               new->len = end - start;
+
+               list_add_tail(&new->list, head);
+               SM_I(sbi)->nr_discards += end - start;
+       }
+}
+
 /*
  * Should call clear_prefree_segments after checkpoint is done.
  */
@@ -138,6 +408,8 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
 
 void clear_prefree_segments(struct f2fs_sb_info *sbi)
 {
+       struct list_head *head = &(SM_I(sbi)->discard_list);
+       struct discard_entry *entry, *this;
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
        unsigned int total_segs = TOTAL_SEGS(sbi);
@@ -160,14 +432,18 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
                if (!test_opt(sbi, DISCARD))
                        continue;
 
-               blkdev_issue_discard(sbi->sb->s_bdev,
-                               START_BLOCK(sbi, start) <<
-                               sbi->log_sectors_per_block,
-                               (1 << (sbi->log_sectors_per_block +
-                               sbi->log_blocks_per_seg)) * (end - start),
-                               GFP_NOFS, 0);
+               f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
+                               (end - start) << sbi->log_blocks_per_seg);
        }
        mutex_unlock(&dirty_i->seglist_lock);
+
+       /* send small discards */
+       list_for_each_entry_safe(entry, this, head, list) {
+               f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
+               list_del(&entry->list);
+               SM_I(sbi)->nr_discards -= entry->len;
+               kmem_cache_free(discard_entry_slab, entry);
+       }
 }
 
 static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
@@ -196,7 +472,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 
        se = get_seg_entry(sbi, segno);
        new_vblocks = se->valid_blocks + del;
-       offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
+       offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
 
        f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
                                (new_vblocks > sbi->blocks_per_seg)));
@@ -225,12 +501,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                get_sec_entry(sbi, segno)->valid_blocks += del;
 }
 
-static void refresh_sit_entry(struct f2fs_sb_info *sbi,
-                       block_t old_blkaddr, block_t new_blkaddr)
+void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
 {
-       update_sit_entry(sbi, new_blkaddr, 1);
-       if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
-               update_sit_entry(sbi, old_blkaddr, -1);
+       update_sit_entry(sbi, new, 1);
+       if (GET_SEGNO(sbi, old) != NULL_SEGNO)
+               update_sit_entry(sbi, old, -1);
+
+       locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
+       locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
 }
 
 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
@@ -459,13 +737,18 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi,
                        struct curseg_info *seg, block_t start)
 {
        struct seg_entry *se = get_seg_entry(sbi, seg->segno);
-       block_t ofs;
-       for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
-               if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
-                       && !f2fs_test_bit(ofs, se->cur_valid_map))
-                       break;
-       }
-       seg->next_blkoff = ofs;
+       int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
+       unsigned long target_map[entries];
+       unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+       unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
+       int i, pos;
+
+       for (i = 0; i < entries; i++)
+               target_map[i] = ckpt_map[i] | cur_map[i];
+
+       pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
+
+       seg->next_blkoff = pos;
 }
 
 /*
@@ -573,148 +856,6 @@ static const struct segment_allocation default_salloc_ops = {
        .allocate_segment = allocate_segment_by_default,
 };
 
-static void f2fs_end_io_write(struct bio *bio, int err)
-{
-       const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-       struct bio_private *p = bio->bi_private;
-
-       do {
-               struct page *page = bvec->bv_page;
-
-               if (--bvec >= bio->bi_io_vec)
-                       prefetchw(&bvec->bv_page->flags);
-               if (!uptodate) {
-                       SetPageError(page);
-                       if (page->mapping)
-                               set_bit(AS_EIO, &page->mapping->flags);
-                       set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
-                       p->sbi->sb->s_flags |= MS_RDONLY;
-               }
-               end_page_writeback(page);
-               dec_page_count(p->sbi, F2FS_WRITEBACK);
-       } while (bvec >= bio->bi_io_vec);
-
-       if (p->is_sync)
-               complete(p->wait);
-
-       if (!get_pages(p->sbi, F2FS_WRITEBACK) &&
-                       !list_empty(&p->sbi->cp_wait.task_list))
-               wake_up(&p->sbi->cp_wait);
-
-       kfree(p);
-       bio_put(bio);
-}
-
-struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
-{
-       struct bio *bio;
-
-       /* No failure on bio allocation */
-       bio = bio_alloc(GFP_NOIO, npages);
-       bio->bi_bdev = bdev;
-       bio->bi_private = NULL;
-
-       return bio;
-}
-
-static void do_submit_bio(struct f2fs_sb_info *sbi,
-                               enum page_type type, bool sync)
-{
-       int rw = sync ? WRITE_SYNC : WRITE;
-       enum page_type btype = type > META ? META : type;
-
-       if (type >= META_FLUSH)
-               rw = WRITE_FLUSH_FUA;
-
-       if (btype == META)
-               rw |= REQ_META;
-
-       if (sbi->bio[btype]) {
-               struct bio_private *p = sbi->bio[btype]->bi_private;
-               p->sbi = sbi;
-               sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
-
-               trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]);
-
-               if (type == META_FLUSH) {
-                       DECLARE_COMPLETION_ONSTACK(wait);
-                       p->is_sync = true;
-                       p->wait = &wait;
-                       submit_bio(rw, sbi->bio[btype]);
-                       wait_for_completion(&wait);
-               } else {
-                       p->is_sync = false;
-                       submit_bio(rw, sbi->bio[btype]);
-               }
-               sbi->bio[btype] = NULL;
-       }
-}
-
-void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
-{
-       down_write(&sbi->bio_sem);
-       do_submit_bio(sbi, type, sync);
-       up_write(&sbi->bio_sem);
-}
-
-static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
-                               block_t blk_addr, enum page_type type)
-{
-       struct block_device *bdev = sbi->sb->s_bdev;
-       int bio_blocks;
-
-       verify_block_addr(sbi, blk_addr);
-
-       down_write(&sbi->bio_sem);
-
-       inc_page_count(sbi, F2FS_WRITEBACK);
-
-       if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
-               do_submit_bio(sbi, type, false);
-alloc_new:
-       if (sbi->bio[type] == NULL) {
-               struct bio_private *priv;
-retry:
-               priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
-               if (!priv) {
-                       cond_resched();
-                       goto retry;
-               }
-
-               bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
-               sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks);
-               sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
-               sbi->bio[type]->bi_private = priv;
-               /*
-                * The end_io will be assigned at the sumbission phase.
-                * Until then, let bio_add_page() merge consecutive IOs as much
-                * as possible.
-                */
-       }
-
-       if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
-                                                       PAGE_CACHE_SIZE) {
-               do_submit_bio(sbi, type, false);
-               goto alloc_new;
-       }
-
-       sbi->last_block_in_bio[type] = blk_addr;
-
-       up_write(&sbi->bio_sem);
-       trace_f2fs_submit_write_page(page, blk_addr, type);
-}
-
-void f2fs_wait_on_page_writeback(struct page *page,
-                               enum page_type type, bool sync)
-{
-       struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
-       if (PageWriteback(page)) {
-               f2fs_submit_bio(sbi, type, sync);
-               wait_on_page_writeback(page);
-       }
-}
-
 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -782,16 +923,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
        return __get_segment_type_6(page, p_type);
 }
 
-static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
-                       block_t old_blkaddr, block_t *new_blkaddr,
-                       struct f2fs_summary *sum, enum page_type p_type)
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+               block_t old_blkaddr, block_t *new_blkaddr,
+               struct f2fs_summary *sum, int type)
 {
        struct sit_info *sit_i = SIT_I(sbi);
        struct curseg_info *curseg;
        unsigned int old_cursegno;
-       int type;
 
-       type = __get_segment_type(page, p_type);
        curseg = CURSEG_I(sbi, type);
 
        mutex_lock(&curseg->curseg_mutex);
@@ -811,62 +950,75 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
 
        stat_inc_block_count(sbi, curseg);
 
+       if (!__has_curseg_space(sbi, type))
+               sit_i->s_ops->allocate_segment(sbi, type, false);
        /*
         * SIT information should be updated before segment allocation,
         * since SSR needs latest valid block information.
         */
        refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
-
-       if (!__has_curseg_space(sbi, type))
-               sit_i->s_ops->allocate_segment(sbi, type, false);
-
        locate_dirty_segment(sbi, old_cursegno);
-       locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+
        mutex_unlock(&sit_i->sentry_lock);
 
-       if (p_type == NODE)
+       if (page && IS_NODESEG(type))
                fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
 
-       /* writeout dirty page into bdev */
-       submit_write_page(sbi, page, *new_blkaddr, p_type);
-
        mutex_unlock(&curseg->curseg_mutex);
 }
 
+static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
+                       block_t old_blkaddr, block_t *new_blkaddr,
+                       struct f2fs_summary *sum, struct f2fs_io_info *fio)
+{
+       int type = __get_segment_type(page, fio->type);
+
+       allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
+
+       /* writeout dirty page into bdev */
+       f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
+}
+
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
 {
+       struct f2fs_io_info fio = {
+               .type = META,
+               .rw = WRITE_SYNC | REQ_META | REQ_PRIO
+       };
+
        set_page_writeback(page);
-       submit_write_page(sbi, page, page->index, META);
+       f2fs_submit_page_mbio(sbi, page, page->index, &fio);
 }
 
 void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
+               struct f2fs_io_info *fio,
                unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
 {
        struct f2fs_summary sum;
        set_summary(&sum, nid, 0, 0);
-       do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
+       do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
 }
 
-void write_data_page(struct inode *inode, struct page *page,
-               struct dnode_of_data *dn, block_t old_blkaddr,
-               block_t *new_blkaddr)
+void write_data_page(struct page *page, struct dnode_of_data *dn,
+               block_t *new_blkaddr, struct f2fs_io_info *fio)
 {
-       struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+       struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
        struct f2fs_summary sum;
        struct node_info ni;
 
-       f2fs_bug_on(old_blkaddr == NULL_ADDR);
+       f2fs_bug_on(dn->data_blkaddr == NULL_ADDR);
        get_node_info(sbi, dn->nid, &ni);
        set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
 
-       do_write_page(sbi, page, old_blkaddr,
-                       new_blkaddr, &sum, DATA);
+       do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
 }
 
-void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
-                                       block_t old_blk_addr)
+void rewrite_data_page(struct page *page, block_t old_blkaddr,
+                                       struct f2fs_io_info *fio)
 {
-       submit_write_page(sbi, page, old_blk_addr, DATA);
+       struct inode *inode = page->mapping->host;
+       struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+       f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio);
 }
 
 void recover_data_page(struct f2fs_sb_info *sbi,
@@ -902,14 +1054,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,
                change_curseg(sbi, type, true);
        }
 
-       curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
-                                       (sbi->blocks_per_seg - 1);
+       curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
        __add_sum_entry(sbi, type, sum);
 
        refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
-
        locate_dirty_segment(sbi, old_cursegno);
-       locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
 
        mutex_unlock(&sit_i->sentry_lock);
        mutex_unlock(&curseg->curseg_mutex);
@@ -925,6 +1074,10 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
        unsigned int segno, old_cursegno;
        block_t next_blkaddr = next_blkaddr_of_node(page);
        unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
+       struct f2fs_io_info fio = {
+               .type = NODE,
+               .rw = WRITE_SYNC,
+       };
 
        curseg = CURSEG_I(sbi, type);
 
@@ -939,8 +1092,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
                curseg->next_segno = segno;
                change_curseg(sbi, type, true);
        }
-       curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
-                                       (sbi->blocks_per_seg - 1);
+       curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
        __add_sum_entry(sbi, type, sum);
 
        /* change the current log to the next block addr in advance */
@@ -948,22 +1100,54 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
                curseg->next_segno = next_segno;
                change_curseg(sbi, type, true);
        }
-       curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
-                                       (sbi->blocks_per_seg - 1);
+       curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
 
        /* rewrite node page */
        set_page_writeback(page);
-       submit_write_page(sbi, page, new_blkaddr, NODE);
-       f2fs_submit_bio(sbi, NODE, true);
+       f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
+       f2fs_submit_merged_bio(sbi, NODE, WRITE);
        refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
-
        locate_dirty_segment(sbi, old_cursegno);
-       locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
 
        mutex_unlock(&sit_i->sentry_lock);
        mutex_unlock(&curseg->curseg_mutex);
 }
 
+static inline bool is_merged_page(struct f2fs_sb_info *sbi,
+                                       struct page *page, enum page_type type)
+{
+       enum page_type btype = PAGE_TYPE_OF_BIO(type);
+       struct f2fs_bio_info *io = &sbi->write_io[btype];
+       struct bio_vec *bvec;
+       int i;
+
+       down_read(&io->io_rwsem);
+       if (!io->bio)
+               goto out;
+
+       bio_for_each_segment_all(bvec, io->bio, i) {
+               if (page == bvec->bv_page) {
+                       up_read(&io->io_rwsem);
+                       return true;
+               }
+       }
+
+out:
+       up_read(&io->io_rwsem);
+       return false;
+}
+
+void f2fs_wait_on_page_writeback(struct page *page,
+                               enum page_type type)
+{
+       struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
+       if (PageWriteback(page)) {
+               if (is_merged_page(sbi, page, type))
+                       f2fs_submit_merged_bio(sbi, type, WRITE);
+               wait_on_page_writeback(page);
+       }
+}
+
 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
 {
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1068,9 +1252,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
                                ns->ofs_in_node = 0;
                        }
                } else {
-                       if (restore_node_summary(sbi, segno, sum)) {
+                       int err;
+
+                       err = restore_node_summary(sbi, segno, sum);
+                       if (err) {
                                f2fs_put_page(new, 1);
-                               return -EINVAL;
+                               return err;
                        }
                }
        }
@@ -1091,6 +1278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 {
        int type = CURSEG_HOT_DATA;
+       int err;
 
        if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
                /* restore for compacted data summary */
@@ -1099,9 +1287,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
                type = CURSEG_HOT_NODE;
        }
 
-       for (; type <= CURSEG_COLD_NODE; type++)
-               if (read_normal_summaries(sbi, type))
-                       return -EINVAL;
+       for (; type <= CURSEG_COLD_NODE; type++) {
+               err = read_normal_summaries(sbi, type);
+               if (err)
+                       return err;
+       }
+
        return 0;
 }
 
@@ -1314,6 +1505,10 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
 
                sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
 
+               /* add discard candidates */
+               if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards)
+                       add_discard_addrs(sbi, segno, se);
+
                if (flushed)
                        goto to_sit_page;
 
@@ -1485,36 +1680,48 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
        struct sit_info *sit_i = SIT_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
        struct f2fs_summary_block *sum = curseg->sum_blk;
-       unsigned int start;
-
-       for (start = 0; start < TOTAL_SEGS(sbi); start++) {
-               struct seg_entry *se = &sit_i->sentries[start];
-               struct f2fs_sit_block *sit_blk;
-               struct f2fs_sit_entry sit;
-               struct page *page;
-               int i;
+       int sit_blk_cnt = SIT_BLK_CNT(sbi);
+       unsigned int i, start, end;
+       unsigned int readed, start_blk = 0;
+       int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
 
-               mutex_lock(&curseg->curseg_mutex);
-               for (i = 0; i < sits_in_cursum(sum); i++) {
-                       if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
-                               sit = sit_in_journal(sum, i);
-                               mutex_unlock(&curseg->curseg_mutex);
-                               goto got_it;
+       do {
+               readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
+
+               start = start_blk * sit_i->sents_per_block;
+               end = (start_blk + readed) * sit_i->sents_per_block;
+
+               for (; start < end && start < TOTAL_SEGS(sbi); start++) {
+                       struct seg_entry *se = &sit_i->sentries[start];
+                       struct f2fs_sit_block *sit_blk;
+                       struct f2fs_sit_entry sit;
+                       struct page *page;
+
+                       mutex_lock(&curseg->curseg_mutex);
+                       for (i = 0; i < sits_in_cursum(sum); i++) {
+                               if (le32_to_cpu(segno_in_journal(sum, i))
+                                                               == start) {
+                                       sit = sit_in_journal(sum, i);
+                                       mutex_unlock(&curseg->curseg_mutex);
+                                       goto got_it;
+                               }
                        }
-               }
-               mutex_unlock(&curseg->curseg_mutex);
-               page = get_current_sit_page(sbi, start);
-               sit_blk = (struct f2fs_sit_block *)page_address(page);
-               sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
-               f2fs_put_page(page, 1);
+                       mutex_unlock(&curseg->curseg_mutex);
+
+                       page = get_current_sit_page(sbi, start);
+                       sit_blk = (struct f2fs_sit_block *)page_address(page);
+                       sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
+                       f2fs_put_page(page, 1);
 got_it:
-               check_block_count(sbi, start, &sit);
-               seg_info_from_raw_sit(se, &sit);
-               if (sbi->segs_per_sec > 1) {
-                       struct sec_entry *e = get_sec_entry(sbi, start);
-                       e->valid_blocks += se->valid_blocks;
+                       check_block_count(sbi, start, &sit);
+                       seg_info_from_raw_sit(se, &sit);
+                       if (sbi->segs_per_sec > 1) {
+                               struct sec_entry *e = get_sec_entry(sbi, start);
+                               e->valid_blocks += se->valid_blocks;
+                       }
                }
-       }
+               start_blk += readed;
+       } while (start_blk < sit_blk_cnt);
 }
 
 static void init_free_segmap(struct f2fs_sb_info *sbi)
@@ -1625,6 +1832,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 {
        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+       dev_t dev = sbi->sb->s_bdev->bd_dev;
        struct f2fs_sm_info *sm_info;
        int err;
 
@@ -1643,7 +1851,24 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
        sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
        sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
-       sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS;
+       sm_info->rec_prefree_segments = sm_info->main_segments *
+                                       DEF_RECLAIM_PREFREE_SEGMENTS / 100;
+       sm_info->ipu_policy = F2FS_IPU_DISABLE;
+       sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
+
+       INIT_LIST_HEAD(&sm_info->discard_list);
+       sm_info->nr_discards = 0;
+       sm_info->max_discards = 0;
+
+       if (test_opt(sbi, FLUSH_MERGE)) {
+               spin_lock_init(&sm_info->issue_lock);
+               init_waitqueue_head(&sm_info->flush_wait_queue);
+
+               sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+                               "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+               if (IS_ERR(sm_info->f2fs_issue_flush))
+                       return PTR_ERR(sm_info->f2fs_issue_flush);
+       }
 
        err = build_sit_info(sbi);
        if (err)
@@ -1753,6 +1978,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
        struct f2fs_sm_info *sm_info = SM_I(sbi);
        if (!sm_info)
                return;
+       if (sm_info->f2fs_issue_flush)
+               kthread_stop(sm_info->f2fs_issue_flush);
        destroy_dirty_segmap(sbi);
        destroy_curseg(sbi);
        destroy_free_segmap(sbi);
@@ -1760,3 +1987,24 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
        sbi->sm_info = NULL;
        kfree(sm_info);
 }
+
+int __init create_segment_manager_caches(void)
+{
+       discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
+                       sizeof(struct discard_entry));
+       if (!discard_entry_slab)
+               return -ENOMEM;
+       flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
+                       sizeof(struct flush_cmd));
+       if (!flush_cmd_slab) {
+               kmem_cache_destroy(discard_entry_slab);
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+void destroy_segment_manager_caches(void)
+{
+       kmem_cache_destroy(discard_entry_slab);
+       kmem_cache_destroy(flush_cmd_slab);
+}