]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/f2fs/segment.c
Merge remote-tracking branch 'ext4/dev'
[karo-tx-linux.git] / fs / f2fs / segment.c
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/vmalloc.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "segment.h"
22 #include "node.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25
26 #define __reverse_ffz(x) __reverse_ffs(~(x))
27
28 static struct kmem_cache *discard_entry_slab;
29 static struct kmem_cache *sit_entry_set_slab;
30 static struct kmem_cache *inmem_entry_slab;
31
32 /*
33  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
34  * MSB and LSB are reversed in a byte by f2fs_set_bit.
35  */
36 static inline unsigned long __reverse_ffs(unsigned long word)
37 {
38         int num = 0;
39
40 #if BITS_PER_LONG == 64
41         if ((word & 0xffffffff) == 0) {
42                 num += 32;
43                 word >>= 32;
44         }
45 #endif
46         if ((word & 0xffff) == 0) {
47                 num += 16;
48                 word >>= 16;
49         }
50         if ((word & 0xff) == 0) {
51                 num += 8;
52                 word >>= 8;
53         }
54         if ((word & 0xf0) == 0)
55                 num += 4;
56         else
57                 word >>= 4;
58         if ((word & 0xc) == 0)
59                 num += 2;
60         else
61                 word >>= 2;
62         if ((word & 0x2) == 0)
63                 num += 1;
64         return num;
65 }
66
67 /*
68  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
69  * f2fs_set_bit makes MSB and LSB reversed in a byte.
70  * Example:
71  *                             LSB <--> MSB
72  *   f2fs_set_bit(0, bitmap) => 0000 0001
73  *   f2fs_set_bit(7, bitmap) => 1000 0000
74  */
75 static unsigned long __find_rev_next_bit(const unsigned long *addr,
76                         unsigned long size, unsigned long offset)
77 {
78         while (!f2fs_test_bit(offset, (unsigned char *)addr))
79                 offset++;
80
81         if (offset > size)
82                 offset = size;
83
84         return offset;
85 #if 0
86         const unsigned long *p = addr + BIT_WORD(offset);
87         unsigned long result = offset & ~(BITS_PER_LONG - 1);
88         unsigned long tmp;
89         unsigned long mask, submask;
90         unsigned long quot, rest;
91
92         if (offset >= size)
93                 return size;
94
95         size -= result;
96         offset %= BITS_PER_LONG;
97         if (!offset)
98                 goto aligned;
99
100         tmp = *(p++);
101         quot = (offset >> 3) << 3;
102         rest = offset & 0x7;
103         mask = ~0UL << quot;
104         submask = (unsigned char)(0xff << rest) >> rest;
105         submask <<= quot;
106         mask &= submask;
107         tmp &= mask;
108         if (size < BITS_PER_LONG)
109                 goto found_first;
110         if (tmp)
111                 goto found_middle;
112
113         size -= BITS_PER_LONG;
114         result += BITS_PER_LONG;
115 aligned:
116         while (size & ~(BITS_PER_LONG-1)) {
117                 tmp = *(p++);
118                 if (tmp)
119                         goto found_middle;
120                 result += BITS_PER_LONG;
121                 size -= BITS_PER_LONG;
122         }
123         if (!size)
124                 return result;
125         tmp = *p;
126 found_first:
127         tmp &= (~0UL >> (BITS_PER_LONG - size));
128         if (tmp == 0UL)         /* Are any bits set? */
129                 return result + size;   /* Nope. */
130 found_middle:
131         return result + __reverse_ffs(tmp);
132 #endif
133 }
134
135 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
136                         unsigned long size, unsigned long offset)
137 {
138         while (f2fs_test_bit(offset, (unsigned char *)addr))
139                 offset++;
140
141         if (offset > size)
142                 offset = size;
143
144         return offset;
145 #if 0
146         const unsigned long *p = addr + BIT_WORD(offset);
147         unsigned long result = offset & ~(BITS_PER_LONG - 1);
148         unsigned long tmp;
149         unsigned long mask, submask;
150         unsigned long quot, rest;
151
152         if (offset >= size)
153                 return size;
154
155         size -= result;
156         offset %= BITS_PER_LONG;
157         if (!offset)
158                 goto aligned;
159
160         tmp = *(p++);
161         quot = (offset >> 3) << 3;
162         rest = offset & 0x7;
163         mask = ~(~0UL << quot);
164         submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
165         submask <<= quot;
166         mask += submask;
167         tmp |= mask;
168         if (size < BITS_PER_LONG)
169                 goto found_first;
170         if (~tmp)
171                 goto found_middle;
172
173         size -= BITS_PER_LONG;
174         result += BITS_PER_LONG;
175 aligned:
176         while (size & ~(BITS_PER_LONG - 1)) {
177                 tmp = *(p++);
178                 if (~tmp)
179                         goto found_middle;
180                 result += BITS_PER_LONG;
181                 size -= BITS_PER_LONG;
182         }
183         if (!size)
184                 return result;
185         tmp = *p;
186
187 found_first:
188         tmp |= ~0UL << size;
189         if (tmp == ~0UL)        /* Are any bits zero? */
190                 return result + size;   /* Nope. */
191 found_middle:
192         return result + __reverse_ffz(tmp);
193 #endif
194 }
195
196 void register_inmem_page(struct inode *inode, struct page *page)
197 {
198         struct f2fs_inode_info *fi = F2FS_I(inode);
199         struct inmem_pages *new;
200
201         f2fs_trace_pid(page);
202
203         set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
204         SetPagePrivate(page);
205
206         new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
207
208         /* add atomic page indices to the list */
209         new->page = page;
210         INIT_LIST_HEAD(&new->list);
211
212         /* increase reference count with clean state */
213         mutex_lock(&fi->inmem_lock);
214         get_page(page);
215         list_add_tail(&new->list, &fi->inmem_pages);
216         inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
217         mutex_unlock(&fi->inmem_lock);
218
219         trace_f2fs_register_inmem_page(page, INMEM);
220 }
221
222 int commit_inmem_pages(struct inode *inode, bool abort)
223 {
224         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
225         struct f2fs_inode_info *fi = F2FS_I(inode);
226         struct inmem_pages *cur, *tmp;
227         bool submit_bio = false;
228         struct f2fs_io_info fio = {
229                 .sbi = sbi,
230                 .type = DATA,
231                 .rw = WRITE_SYNC | REQ_PRIO,
232                 .encrypted_page = NULL,
233         };
234         int err = 0;
235
236         /*
237          * The abort is true only when f2fs_evict_inode is called.
238          * Basically, the f2fs_evict_inode doesn't produce any data writes, so
239          * that we don't need to call f2fs_balance_fs.
240          * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
241          * inode becomes free by iget_locked in f2fs_iget.
242          */
243         if (!abort) {
244                 f2fs_balance_fs(sbi);
245                 f2fs_lock_op(sbi);
246         }
247
248         mutex_lock(&fi->inmem_lock);
249         list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
250                 lock_page(cur->page);
251                 if (!abort) {
252                         if (cur->page->mapping == inode->i_mapping) {
253                                 set_page_dirty(cur->page);
254                                 f2fs_wait_on_page_writeback(cur->page, DATA);
255                                 if (clear_page_dirty_for_io(cur->page))
256                                         inode_dec_dirty_pages(inode);
257                                 trace_f2fs_commit_inmem_page(cur->page, INMEM);
258                                 fio.page = cur->page;
259                                 err = do_write_data_page(&fio);
260                                 submit_bio = true;
261                                 if (err) {
262                                         unlock_page(cur->page);
263                                         break;
264                                 }
265                         }
266                 } else {
267                         trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
268                 }
269                 set_page_private(cur->page, 0);
270                 ClearPagePrivate(cur->page);
271                 f2fs_put_page(cur->page, 1);
272
273                 list_del(&cur->list);
274                 kmem_cache_free(inmem_entry_slab, cur);
275                 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
276         }
277         mutex_unlock(&fi->inmem_lock);
278
279         if (!abort) {
280                 f2fs_unlock_op(sbi);
281                 if (submit_bio)
282                         f2fs_submit_merged_bio(sbi, DATA, WRITE);
283         }
284         return err;
285 }
286
287 /*
288  * This function balances dirty node and dentry pages.
289  * In addition, it controls garbage collection.
290  */
291 void f2fs_balance_fs(struct f2fs_sb_info *sbi)
292 {
293         /*
294          * We should do GC or end up with checkpoint, if there are so many dirty
295          * dir/node pages without enough free segments.
296          */
297         if (has_not_enough_free_secs(sbi, 0)) {
298                 mutex_lock(&sbi->gc_mutex);
299                 f2fs_gc(sbi);
300         }
301 }
302
303 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
304 {
305         /* try to shrink extent cache when there is no enough memory */
306         if (!available_free_memory(sbi, EXTENT_CACHE))
307                 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
308
309         /* check the # of cached NAT entries */
310         if (!available_free_memory(sbi, NAT_ENTRIES))
311                 try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
312
313         if (!available_free_memory(sbi, FREE_NIDS))
314                 try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES);
315
316         /* checkpoint is the only way to shrink partial cached entries */
317         if (!available_free_memory(sbi, NAT_ENTRIES) ||
318                         excess_prefree_segs(sbi) ||
319                         !available_free_memory(sbi, INO_ENTRIES))
320                 f2fs_sync_fs(sbi->sb, true);
321 }
322
323 static int issue_flush_thread(void *data)
324 {
325         struct f2fs_sb_info *sbi = data;
326         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
327         wait_queue_head_t *q = &fcc->flush_wait_queue;
328 repeat:
329         if (kthread_should_stop())
330                 return 0;
331
332         if (!llist_empty(&fcc->issue_list)) {
333                 struct bio *bio;
334                 struct flush_cmd *cmd, *next;
335                 int ret;
336
337                 bio = f2fs_bio_alloc(0);
338
339                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
340                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
341
342                 bio->bi_bdev = sbi->sb->s_bdev;
343                 ret = submit_bio_wait(WRITE_FLUSH, bio);
344
345                 llist_for_each_entry_safe(cmd, next,
346                                           fcc->dispatch_list, llnode) {
347                         cmd->ret = ret;
348                         complete(&cmd->wait);
349                 }
350                 bio_put(bio);
351                 fcc->dispatch_list = NULL;
352         }
353
354         wait_event_interruptible(*q,
355                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
356         goto repeat;
357 }
358
359 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
360 {
361         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
362         struct flush_cmd cmd;
363
364         trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
365                                         test_opt(sbi, FLUSH_MERGE));
366
367         if (test_opt(sbi, NOBARRIER))
368                 return 0;
369
370         if (!test_opt(sbi, FLUSH_MERGE)) {
371                 struct bio *bio = f2fs_bio_alloc(0);
372                 int ret;
373
374                 bio->bi_bdev = sbi->sb->s_bdev;
375                 ret = submit_bio_wait(WRITE_FLUSH, bio);
376                 bio_put(bio);
377                 return ret;
378         }
379
380         init_completion(&cmd.wait);
381
382         llist_add(&cmd.llnode, &fcc->issue_list);
383
384         if (!fcc->dispatch_list)
385                 wake_up(&fcc->flush_wait_queue);
386
387         wait_for_completion(&cmd.wait);
388
389         return cmd.ret;
390 }
391
392 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
393 {
394         dev_t dev = sbi->sb->s_bdev->bd_dev;
395         struct flush_cmd_control *fcc;
396         int err = 0;
397
398         fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
399         if (!fcc)
400                 return -ENOMEM;
401         init_waitqueue_head(&fcc->flush_wait_queue);
402         init_llist_head(&fcc->issue_list);
403         SM_I(sbi)->cmd_control_info = fcc;
404         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
405                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
406         if (IS_ERR(fcc->f2fs_issue_flush)) {
407                 err = PTR_ERR(fcc->f2fs_issue_flush);
408                 kfree(fcc);
409                 SM_I(sbi)->cmd_control_info = NULL;
410                 return err;
411         }
412
413         return err;
414 }
415
416 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
417 {
418         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
419
420         if (fcc && fcc->f2fs_issue_flush)
421                 kthread_stop(fcc->f2fs_issue_flush);
422         kfree(fcc);
423         SM_I(sbi)->cmd_control_info = NULL;
424 }
425
426 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
427                 enum dirty_type dirty_type)
428 {
429         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
430
431         /* need not be added */
432         if (IS_CURSEG(sbi, segno))
433                 return;
434
435         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
436                 dirty_i->nr_dirty[dirty_type]++;
437
438         if (dirty_type == DIRTY) {
439                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
440                 enum dirty_type t = sentry->type;
441
442                 if (unlikely(t >= DIRTY)) {
443                         f2fs_bug_on(sbi, 1);
444                         return;
445                 }
446                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
447                         dirty_i->nr_dirty[t]++;
448         }
449 }
450
451 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
452                 enum dirty_type dirty_type)
453 {
454         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
455
456         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
457                 dirty_i->nr_dirty[dirty_type]--;
458
459         if (dirty_type == DIRTY) {
460                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
461                 enum dirty_type t = sentry->type;
462
463                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
464                         dirty_i->nr_dirty[t]--;
465
466                 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
467                         clear_bit(GET_SECNO(sbi, segno),
468                                                 dirty_i->victim_secmap);
469         }
470 }
471
472 /*
473  * Should not occur error such as -ENOMEM.
474  * Adding dirty entry into seglist is not critical operation.
475  * If a given segment is one of current working segments, it won't be added.
476  */
477 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
478 {
479         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
480         unsigned short valid_blocks;
481
482         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
483                 return;
484
485         mutex_lock(&dirty_i->seglist_lock);
486
487         valid_blocks = get_valid_blocks(sbi, segno, 0);
488
489         if (valid_blocks == 0) {
490                 __locate_dirty_segment(sbi, segno, PRE);
491                 __remove_dirty_segment(sbi, segno, DIRTY);
492         } else if (valid_blocks < sbi->blocks_per_seg) {
493                 __locate_dirty_segment(sbi, segno, DIRTY);
494         } else {
495                 /* Recovery routine with SSR needs this */
496                 __remove_dirty_segment(sbi, segno, DIRTY);
497         }
498
499         mutex_unlock(&dirty_i->seglist_lock);
500 }
501
502 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
503                                 block_t blkstart, block_t blklen)
504 {
505         sector_t start = SECTOR_FROM_BLOCK(blkstart);
506         sector_t len = SECTOR_FROM_BLOCK(blklen);
507         struct seg_entry *se;
508         unsigned int offset;
509         block_t i;
510
511         for (i = blkstart; i < blkstart + blklen; i++) {
512                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
513                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
514
515                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
516                         sbi->discard_blks--;
517         }
518         trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
519         return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
520 }
521
522 bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
523 {
524         int err = -ENOTSUPP;
525
526         if (test_opt(sbi, DISCARD)) {
527                 struct seg_entry *se = get_seg_entry(sbi,
528                                 GET_SEGNO(sbi, blkaddr));
529                 unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
530
531                 if (f2fs_test_bit(offset, se->discard_map))
532                         return false;
533
534                 err = f2fs_issue_discard(sbi, blkaddr, 1);
535         }
536
537         if (err) {
538                 update_meta_page(sbi, NULL, blkaddr);
539                 return true;
540         }
541         return false;
542 }
543
544 static void __add_discard_entry(struct f2fs_sb_info *sbi,
545                 struct cp_control *cpc, struct seg_entry *se,
546                 unsigned int start, unsigned int end)
547 {
548         struct list_head *head = &SM_I(sbi)->discard_list;
549         struct discard_entry *new, *last;
550
551         if (!list_empty(head)) {
552                 last = list_last_entry(head, struct discard_entry, list);
553                 if (START_BLOCK(sbi, cpc->trim_start) + start ==
554                                                 last->blkaddr + last->len) {
555                         last->len += end - start;
556                         goto done;
557                 }
558         }
559
560         new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
561         INIT_LIST_HEAD(&new->list);
562         new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
563         new->len = end - start;
564         list_add_tail(&new->list, head);
565 done:
566         SM_I(sbi)->nr_discards += end - start;
567 }
568
569 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
570 {
571         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
572         int max_blocks = sbi->blocks_per_seg;
573         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
574         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
575         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
576         unsigned long *discard_map = (unsigned long *)se->discard_map;
577         unsigned long *dmap = SIT_I(sbi)->tmp_map;
578         unsigned int start = 0, end = -1;
579         bool force = (cpc->reason == CP_DISCARD);
580         int i;
581
582         if (se->valid_blocks == max_blocks)
583                 return;
584
585         if (!force) {
586                 if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
587                     SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
588                         return;
589         }
590
591         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
592         for (i = 0; i < entries; i++)
593                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
594                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
595
596         while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
597                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
598                 if (start >= max_blocks)
599                         break;
600
601                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
602                 __add_discard_entry(sbi, cpc, se, start, end);
603         }
604 }
605
606 void release_discard_addrs(struct f2fs_sb_info *sbi)
607 {
608         struct list_head *head = &(SM_I(sbi)->discard_list);
609         struct discard_entry *entry, *this;
610
611         /* drop caches */
612         list_for_each_entry_safe(entry, this, head, list) {
613                 list_del(&entry->list);
614                 kmem_cache_free(discard_entry_slab, entry);
615         }
616 }
617
618 /*
619  * Should call clear_prefree_segments after checkpoint is done.
620  */
621 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
622 {
623         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
624         unsigned int segno;
625
626         mutex_lock(&dirty_i->seglist_lock);
627         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
628                 __set_test_and_free(sbi, segno);
629         mutex_unlock(&dirty_i->seglist_lock);
630 }
631
632 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
633 {
634         struct list_head *head = &(SM_I(sbi)->discard_list);
635         struct discard_entry *entry, *this;
636         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
637         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
638         unsigned int start = 0, end = -1;
639
640         mutex_lock(&dirty_i->seglist_lock);
641
642         while (1) {
643                 int i;
644                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
645                 if (start >= MAIN_SEGS(sbi))
646                         break;
647                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
648                                                                 start + 1);
649
650                 for (i = start; i < end; i++)
651                         clear_bit(i, prefree_map);
652
653                 dirty_i->nr_dirty[PRE] -= end - start;
654
655                 if (!test_opt(sbi, DISCARD))
656                         continue;
657
658                 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
659                                 (end - start) << sbi->log_blocks_per_seg);
660         }
661         mutex_unlock(&dirty_i->seglist_lock);
662
663         /* send small discards */
664         list_for_each_entry_safe(entry, this, head, list) {
665                 if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen)
666                         goto skip;
667                 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
668                 cpc->trimmed += entry->len;
669 skip:
670                 list_del(&entry->list);
671                 SM_I(sbi)->nr_discards -= entry->len;
672                 kmem_cache_free(discard_entry_slab, entry);
673         }
674 }
675
676 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
677 {
678         struct sit_info *sit_i = SIT_I(sbi);
679
680         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
681                 sit_i->dirty_sentries++;
682                 return false;
683         }
684
685         return true;
686 }
687
688 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
689                                         unsigned int segno, int modified)
690 {
691         struct seg_entry *se = get_seg_entry(sbi, segno);
692         se->type = type;
693         if (modified)
694                 __mark_sit_entry_dirty(sbi, segno);
695 }
696
697 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
698 {
699         struct seg_entry *se;
700         unsigned int segno, offset;
701         long int new_vblocks;
702
703         segno = GET_SEGNO(sbi, blkaddr);
704
705         se = get_seg_entry(sbi, segno);
706         new_vblocks = se->valid_blocks + del;
707         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
708
709         f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
710                                 (new_vblocks > sbi->blocks_per_seg)));
711
712         se->valid_blocks = new_vblocks;
713         se->mtime = get_mtime(sbi);
714         SIT_I(sbi)->max_mtime = se->mtime;
715
716         /* Update valid block bitmap */
717         if (del > 0) {
718                 if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
719                         f2fs_bug_on(sbi, 1);
720                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
721                         sbi->discard_blks--;
722         } else {
723                 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
724                         f2fs_bug_on(sbi, 1);
725                 if (f2fs_test_and_clear_bit(offset, se->discard_map))
726                         sbi->discard_blks++;
727         }
728         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
729                 se->ckpt_valid_blocks += del;
730
731         __mark_sit_entry_dirty(sbi, segno);
732
733         /* update total number of valid blocks to be written in ckpt area */
734         SIT_I(sbi)->written_valid_blocks += del;
735
736         if (sbi->segs_per_sec > 1)
737                 get_sec_entry(sbi, segno)->valid_blocks += del;
738 }
739
740 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
741 {
742         update_sit_entry(sbi, new, 1);
743         if (GET_SEGNO(sbi, old) != NULL_SEGNO)
744                 update_sit_entry(sbi, old, -1);
745
746         locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
747         locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
748 }
749
750 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
751 {
752         unsigned int segno = GET_SEGNO(sbi, addr);
753         struct sit_info *sit_i = SIT_I(sbi);
754
755         f2fs_bug_on(sbi, addr == NULL_ADDR);
756         if (addr == NEW_ADDR)
757                 return;
758
759         /* add it into sit main buffer */
760         mutex_lock(&sit_i->sentry_lock);
761
762         update_sit_entry(sbi, addr, -1);
763
764         /* add it into dirty seglist */
765         locate_dirty_segment(sbi, segno);
766
767         mutex_unlock(&sit_i->sentry_lock);
768 }
769
770 /*
771  * This function should be resided under the curseg_mutex lock
772  */
773 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
774                                         struct f2fs_summary *sum)
775 {
776         struct curseg_info *curseg = CURSEG_I(sbi, type);
777         void *addr = curseg->sum_blk;
778         addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
779         memcpy(addr, sum, sizeof(struct f2fs_summary));
780 }
781
782 /*
783  * Calculate the number of current summary pages for writing
784  */
785 int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
786 {
787         int valid_sum_count = 0;
788         int i, sum_in_page;
789
790         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
791                 if (sbi->ckpt->alloc_type[i] == SSR)
792                         valid_sum_count += sbi->blocks_per_seg;
793                 else {
794                         if (for_ra)
795                                 valid_sum_count += le16_to_cpu(
796                                         F2FS_CKPT(sbi)->cur_data_blkoff[i]);
797                         else
798                                 valid_sum_count += curseg_blkoff(sbi, i);
799                 }
800         }
801
802         sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
803                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
804         if (valid_sum_count <= sum_in_page)
805                 return 1;
806         else if ((valid_sum_count - sum_in_page) <=
807                 (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
808                 return 2;
809         return 3;
810 }
811
812 /*
813  * Caller should put this summary page
814  */
815 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
816 {
817         return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
818 }
819
820 void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
821 {
822         struct page *page = grab_meta_page(sbi, blk_addr);
823         void *dst = page_address(page);
824
825         if (src)
826                 memcpy(dst, src, PAGE_CACHE_SIZE);
827         else
828                 memset(dst, 0, PAGE_CACHE_SIZE);
829         set_page_dirty(page);
830         f2fs_put_page(page, 1);
831 }
832
833 static void write_sum_page(struct f2fs_sb_info *sbi,
834                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
835 {
836         update_meta_page(sbi, (void *)sum_blk, blk_addr);
837 }
838
839 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
840 {
841         struct curseg_info *curseg = CURSEG_I(sbi, type);
842         unsigned int segno = curseg->segno + 1;
843         struct free_segmap_info *free_i = FREE_I(sbi);
844
845         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
846                 return !test_bit(segno, free_i->free_segmap);
847         return 0;
848 }
849
850 /*
851  * Find a new segment from the free segments bitmap to right order
852  * This function should be returned with success, otherwise BUG
853  */
854 static void get_new_segment(struct f2fs_sb_info *sbi,
855                         unsigned int *newseg, bool new_sec, int dir)
856 {
857         struct free_segmap_info *free_i = FREE_I(sbi);
858         unsigned int segno, secno, zoneno;
859         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
860         unsigned int hint = *newseg / sbi->segs_per_sec;
861         unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
862         unsigned int left_start = hint;
863         bool init = true;
864         int go_left = 0;
865         int i;
866
867         spin_lock(&free_i->segmap_lock);
868
869         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
870                 segno = find_next_zero_bit(free_i->free_segmap,
871                                         MAIN_SEGS(sbi), *newseg + 1);
872                 if (segno - *newseg < sbi->segs_per_sec -
873                                         (*newseg % sbi->segs_per_sec))
874                         goto got_it;
875         }
876 find_other_zone:
877         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
878         if (secno >= MAIN_SECS(sbi)) {
879                 if (dir == ALLOC_RIGHT) {
880                         secno = find_next_zero_bit(free_i->free_secmap,
881                                                         MAIN_SECS(sbi), 0);
882                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
883                 } else {
884                         go_left = 1;
885                         left_start = hint - 1;
886                 }
887         }
888         if (go_left == 0)
889                 goto skip_left;
890
891         while (test_bit(left_start, free_i->free_secmap)) {
892                 if (left_start > 0) {
893                         left_start--;
894                         continue;
895                 }
896                 left_start = find_next_zero_bit(free_i->free_secmap,
897                                                         MAIN_SECS(sbi), 0);
898                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
899                 break;
900         }
901         secno = left_start;
902 skip_left:
903         hint = secno;
904         segno = secno * sbi->segs_per_sec;
905         zoneno = secno / sbi->secs_per_zone;
906
907         /* give up on finding another zone */
908         if (!init)
909                 goto got_it;
910         if (sbi->secs_per_zone == 1)
911                 goto got_it;
912         if (zoneno == old_zoneno)
913                 goto got_it;
914         if (dir == ALLOC_LEFT) {
915                 if (!go_left && zoneno + 1 >= total_zones)
916                         goto got_it;
917                 if (go_left && zoneno == 0)
918                         goto got_it;
919         }
920         for (i = 0; i < NR_CURSEG_TYPE; i++)
921                 if (CURSEG_I(sbi, i)->zone == zoneno)
922                         break;
923
924         if (i < NR_CURSEG_TYPE) {
925                 /* zone is in user, try another */
926                 if (go_left)
927                         hint = zoneno * sbi->secs_per_zone - 1;
928                 else if (zoneno + 1 >= total_zones)
929                         hint = 0;
930                 else
931                         hint = (zoneno + 1) * sbi->secs_per_zone;
932                 init = false;
933                 goto find_other_zone;
934         }
935 got_it:
936         /* set it as dirty segment in free segmap */
937         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
938         __set_inuse(sbi, segno);
939         *newseg = segno;
940         spin_unlock(&free_i->segmap_lock);
941 }
942
943 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
944 {
945         struct curseg_info *curseg = CURSEG_I(sbi, type);
946         struct summary_footer *sum_footer;
947
948         curseg->segno = curseg->next_segno;
949         curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
950         curseg->next_blkoff = 0;
951         curseg->next_segno = NULL_SEGNO;
952
953         sum_footer = &(curseg->sum_blk->footer);
954         memset(sum_footer, 0, sizeof(struct summary_footer));
955         if (IS_DATASEG(type))
956                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
957         if (IS_NODESEG(type))
958                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
959         __set_sit_entry_type(sbi, type, curseg->segno, modified);
960 }
961
962 /*
963  * Allocate a current working segment.
964  * This function always allocates a free segment in LFS manner.
965  */
966 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
967 {
968         struct curseg_info *curseg = CURSEG_I(sbi, type);
969         unsigned int segno = curseg->segno;
970         int dir = ALLOC_LEFT;
971
972         write_sum_page(sbi, curseg->sum_blk,
973                                 GET_SUM_BLOCK(sbi, segno));
974         if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
975                 dir = ALLOC_RIGHT;
976
977         if (test_opt(sbi, NOHEAP))
978                 dir = ALLOC_RIGHT;
979
980         get_new_segment(sbi, &segno, new_sec, dir);
981         curseg->next_segno = segno;
982         reset_curseg(sbi, type, 1);
983         curseg->alloc_type = LFS;
984 }
985
986 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
987                         struct curseg_info *seg, block_t start)
988 {
989         struct seg_entry *se = get_seg_entry(sbi, seg->segno);
990         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
991         unsigned long *target_map = SIT_I(sbi)->tmp_map;
992         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
993         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
994         int i, pos;
995
996         for (i = 0; i < entries; i++)
997                 target_map[i] = ckpt_map[i] | cur_map[i];
998
999         pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
1000
1001         seg->next_blkoff = pos;
1002 }
1003
1004 /*
1005  * If a segment is written by LFS manner, next block offset is just obtained
1006  * by increasing the current block offset. However, if a segment is written by
1007  * SSR manner, next block offset obtained by calling __next_free_blkoff
1008  */
1009 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
1010                                 struct curseg_info *seg)
1011 {
1012         if (seg->alloc_type == SSR)
1013                 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
1014         else
1015                 seg->next_blkoff++;
1016 }
1017
1018 /*
1019  * This function always allocates a used segment(from dirty seglist) by SSR
1020  * manner, so it should recover the existing segment information of valid blocks
1021  */
1022 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
1023 {
1024         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1025         struct curseg_info *curseg = CURSEG_I(sbi, type);
1026         unsigned int new_segno = curseg->next_segno;
1027         struct f2fs_summary_block *sum_node;
1028         struct page *sum_page;
1029
1030         write_sum_page(sbi, curseg->sum_blk,
1031                                 GET_SUM_BLOCK(sbi, curseg->segno));
1032         __set_test_and_inuse(sbi, new_segno);
1033
1034         mutex_lock(&dirty_i->seglist_lock);
1035         __remove_dirty_segment(sbi, new_segno, PRE);
1036         __remove_dirty_segment(sbi, new_segno, DIRTY);
1037         mutex_unlock(&dirty_i->seglist_lock);
1038
1039         reset_curseg(sbi, type, 1);
1040         curseg->alloc_type = SSR;
1041         __next_free_blkoff(sbi, curseg, 0);
1042
1043         if (reuse) {
1044                 sum_page = get_sum_page(sbi, new_segno);
1045                 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
1046                 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
1047                 f2fs_put_page(sum_page, 1);
1048         }
1049 }
1050
1051 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1052 {
1053         struct curseg_info *curseg = CURSEG_I(sbi, type);
1054         const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
1055
1056         if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
1057                 return v_ops->get_victim(sbi,
1058                                 &(curseg)->next_segno, BG_GC, type, SSR);
1059
1060         /* For data segments, let's do SSR more intensively */
1061         for (; type >= CURSEG_HOT_DATA; type--)
1062                 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1063                                                 BG_GC, type, SSR))
1064                         return 1;
1065         return 0;
1066 }
1067
1068 /*
1069  * flush out current segment and replace it with new segment
1070  * This function should be returned with success, otherwise BUG
1071  */
1072 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1073                                                 int type, bool force)
1074 {
1075         struct curseg_info *curseg = CURSEG_I(sbi, type);
1076
1077         if (force)
1078                 new_curseg(sbi, type, true);
1079         else if (type == CURSEG_WARM_NODE)
1080                 new_curseg(sbi, type, false);
1081         else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1082                 new_curseg(sbi, type, false);
1083         else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1084                 change_curseg(sbi, type, true);
1085         else
1086                 new_curseg(sbi, type, false);
1087
1088         stat_inc_seg_type(sbi, curseg);
1089 }
1090
1091 static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
1092 {
1093         struct curseg_info *curseg = CURSEG_I(sbi, type);
1094         unsigned int old_segno;
1095
1096         old_segno = curseg->segno;
1097         SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
1098         locate_dirty_segment(sbi, old_segno);
1099 }
1100
1101 void allocate_new_segments(struct f2fs_sb_info *sbi)
1102 {
1103         int i;
1104
1105         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
1106                 __allocate_new_segments(sbi, i);
1107 }
1108
1109 static const struct segment_allocation default_salloc_ops = {
1110         .allocate_segment = allocate_segment_by_default,
1111 };
1112
1113 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1114 {
1115         __u64 start = F2FS_BYTES_TO_BLK(range->start);
1116         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
1117         unsigned int start_segno, end_segno;
1118         struct cp_control cpc;
1119
1120         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
1121                 return -EINVAL;
1122
1123         cpc.trimmed = 0;
1124         if (end <= MAIN_BLKADDR(sbi))
1125                 goto out;
1126
1127         /* start/end segment number in main_area */
1128         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1129         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1130                                                 GET_SEGNO(sbi, end);
1131         cpc.reason = CP_DISCARD;
1132         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
1133
1134         /* do checkpoint to issue discard commands safely */
1135         for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
1136                 cpc.trim_start = start_segno;
1137
1138                 if (sbi->discard_blks == 0)
1139                         break;
1140                 else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
1141                         cpc.trim_end = end_segno;
1142                 else
1143                         cpc.trim_end = min_t(unsigned int,
1144                                 rounddown(start_segno +
1145                                 BATCHED_TRIM_SEGMENTS(sbi),
1146                                 sbi->segs_per_sec) - 1, end_segno);
1147
1148                 mutex_lock(&sbi->gc_mutex);
1149                 write_checkpoint(sbi, &cpc);
1150                 mutex_unlock(&sbi->gc_mutex);
1151         }
1152 out:
1153         range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
1154         return 0;
1155 }
1156
1157 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1158 {
1159         struct curseg_info *curseg = CURSEG_I(sbi, type);
1160         if (curseg->next_blkoff < sbi->blocks_per_seg)
1161                 return true;
1162         return false;
1163 }
1164
1165 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1166 {
1167         if (p_type == DATA)
1168                 return CURSEG_HOT_DATA;
1169         else
1170                 return CURSEG_HOT_NODE;
1171 }
1172
1173 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1174 {
1175         if (p_type == DATA) {
1176                 struct inode *inode = page->mapping->host;
1177
1178                 if (S_ISDIR(inode->i_mode))
1179                         return CURSEG_HOT_DATA;
1180                 else
1181                         return CURSEG_COLD_DATA;
1182         } else {
1183                 if (IS_DNODE(page) && is_cold_node(page))
1184                         return CURSEG_WARM_NODE;
1185                 else
1186                         return CURSEG_COLD_NODE;
1187         }
1188 }
1189
1190 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1191 {
1192         if (p_type == DATA) {
1193                 struct inode *inode = page->mapping->host;
1194
1195                 if (S_ISDIR(inode->i_mode))
1196                         return CURSEG_HOT_DATA;
1197                 else if (is_cold_data(page) || file_is_cold(inode))
1198                         return CURSEG_COLD_DATA;
1199                 else
1200                         return CURSEG_WARM_DATA;
1201         } else {
1202                 if (IS_DNODE(page))
1203                         return is_cold_node(page) ? CURSEG_WARM_NODE :
1204                                                 CURSEG_HOT_NODE;
1205                 else
1206                         return CURSEG_COLD_NODE;
1207         }
1208 }
1209
1210 static int __get_segment_type(struct page *page, enum page_type p_type)
1211 {
1212         switch (F2FS_P_SB(page)->active_logs) {
1213         case 2:
1214                 return __get_segment_type_2(page, p_type);
1215         case 4:
1216                 return __get_segment_type_4(page, p_type);
1217         }
1218         /* NR_CURSEG_TYPE(6) logs by default */
1219         f2fs_bug_on(F2FS_P_SB(page),
1220                 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1221         return __get_segment_type_6(page, p_type);
1222 }
1223
1224 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1225                 block_t old_blkaddr, block_t *new_blkaddr,
1226                 struct f2fs_summary *sum, int type)
1227 {
1228         struct sit_info *sit_i = SIT_I(sbi);
1229         struct curseg_info *curseg;
1230         bool direct_io = (type == CURSEG_DIRECT_IO);
1231
1232         type = direct_io ? CURSEG_WARM_DATA : type;
1233
1234         curseg = CURSEG_I(sbi, type);
1235
1236         mutex_lock(&curseg->curseg_mutex);
1237         mutex_lock(&sit_i->sentry_lock);
1238
1239         /* direct_io'ed data is aligned to the segment for better performance */
1240         if (direct_io && curseg->next_blkoff &&
1241                                 !has_not_enough_free_secs(sbi, 0))
1242                 __allocate_new_segments(sbi, type);
1243
1244         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1245
1246         /*
1247          * __add_sum_entry should be resided under the curseg_mutex
1248          * because, this function updates a summary entry in the
1249          * current summary block.
1250          */
1251         __add_sum_entry(sbi, type, sum);
1252
1253         __refresh_next_blkoff(sbi, curseg);
1254
1255         stat_inc_block_count(sbi, curseg);
1256
1257         if (!__has_curseg_space(sbi, type))
1258                 sit_i->s_ops->allocate_segment(sbi, type, false);
1259         /*
1260          * SIT information should be updated before segment allocation,
1261          * since SSR needs latest valid block information.
1262          */
1263         refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1264
1265         mutex_unlock(&sit_i->sentry_lock);
1266
1267         if (page && IS_NODESEG(type))
1268                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1269
1270         mutex_unlock(&curseg->curseg_mutex);
1271 }
1272
1273 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
1274 {
1275         int type = __get_segment_type(fio->page, fio->type);
1276
1277         allocate_data_block(fio->sbi, fio->page, fio->blk_addr,
1278                                         &fio->blk_addr, sum, type);
1279
1280         /* writeout dirty page into bdev */
1281         f2fs_submit_page_mbio(fio);
1282 }
1283
1284 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1285 {
1286         struct f2fs_io_info fio = {
1287                 .sbi = sbi,
1288                 .type = META,
1289                 .rw = WRITE_SYNC | REQ_META | REQ_PRIO,
1290                 .blk_addr = page->index,
1291                 .page = page,
1292                 .encrypted_page = NULL,
1293         };
1294
1295         set_page_writeback(page);
1296         f2fs_submit_page_mbio(&fio);
1297 }
1298
1299 void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
1300 {
1301         struct f2fs_summary sum;
1302
1303         set_summary(&sum, nid, 0, 0);
1304         do_write_page(&sum, fio);
1305 }
1306
1307 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
1308 {
1309         struct f2fs_sb_info *sbi = fio->sbi;
1310         struct f2fs_summary sum;
1311         struct node_info ni;
1312
1313         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1314         get_node_info(sbi, dn->nid, &ni);
1315         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1316         do_write_page(&sum, fio);
1317         dn->data_blkaddr = fio->blk_addr;
1318 }
1319
1320 void rewrite_data_page(struct f2fs_io_info *fio)
1321 {
1322         stat_inc_inplace_blocks(fio->sbi);
1323         f2fs_submit_page_mbio(fio);
1324 }
1325
1326 static void __f2fs_replace_block(struct f2fs_sb_info *sbi,
1327                                 struct f2fs_summary *sum,
1328                                 block_t old_blkaddr, block_t new_blkaddr,
1329                                 bool recover_curseg)
1330 {
1331         struct sit_info *sit_i = SIT_I(sbi);
1332         struct curseg_info *curseg;
1333         unsigned int segno, old_cursegno;
1334         struct seg_entry *se;
1335         int type;
1336         unsigned short old_blkoff;
1337
1338         segno = GET_SEGNO(sbi, new_blkaddr);
1339         se = get_seg_entry(sbi, segno);
1340         type = se->type;
1341
1342         if (!recover_curseg) {
1343                 /* for recovery flow */
1344                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1345                         if (old_blkaddr == NULL_ADDR)
1346                                 type = CURSEG_COLD_DATA;
1347                         else
1348                                 type = CURSEG_WARM_DATA;
1349                 }
1350         } else {
1351                 if (!IS_CURSEG(sbi, segno))
1352                         type = CURSEG_WARM_DATA;
1353         }
1354
1355         curseg = CURSEG_I(sbi, type);
1356
1357         mutex_lock(&curseg->curseg_mutex);
1358         mutex_lock(&sit_i->sentry_lock);
1359
1360         old_cursegno = curseg->segno;
1361         old_blkoff = curseg->next_blkoff;
1362
1363         /* change the current segment */
1364         if (segno != curseg->segno) {
1365                 curseg->next_segno = segno;
1366                 change_curseg(sbi, type, true);
1367         }
1368
1369         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1370         __add_sum_entry(sbi, type, sum);
1371
1372         refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1373         locate_dirty_segment(sbi, old_cursegno);
1374
1375         if (recover_curseg) {
1376                 if (old_cursegno != curseg->segno) {
1377                         curseg->next_segno = old_cursegno;
1378                         change_curseg(sbi, type, true);
1379                 }
1380                 curseg->next_blkoff = old_blkoff;
1381         }
1382
1383         mutex_unlock(&sit_i->sentry_lock);
1384         mutex_unlock(&curseg->curseg_mutex);
1385 }
1386
1387 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
1388                                 block_t old_addr, block_t new_addr,
1389                                 unsigned char version, bool recover_curseg)
1390 {
1391         struct f2fs_summary sum;
1392
1393         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
1394
1395         __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg);
1396
1397         dn->data_blkaddr = new_addr;
1398         set_data_blkaddr(dn);
1399         f2fs_update_extent_cache(dn);
1400 }
1401
1402 static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1403                                         struct page *page, enum page_type type)
1404 {
1405         enum page_type btype = PAGE_TYPE_OF_BIO(type);
1406         struct f2fs_bio_info *io = &sbi->write_io[btype];
1407         struct bio_vec *bvec;
1408         struct page *target;
1409         int i;
1410
1411         down_read(&io->io_rwsem);
1412         if (!io->bio) {
1413                 up_read(&io->io_rwsem);
1414                 return false;
1415         }
1416
1417         bio_for_each_segment_all(bvec, io->bio, i) {
1418
1419                 if (bvec->bv_page->mapping) {
1420                         target = bvec->bv_page;
1421                 } else {
1422                         struct f2fs_crypto_ctx *ctx;
1423
1424                         /* encrypted page */
1425                         ctx = (struct f2fs_crypto_ctx *)page_private(
1426                                                                 bvec->bv_page);
1427                         target = ctx->w.control_page;
1428                 }
1429
1430                 if (page == target) {
1431                         up_read(&io->io_rwsem);
1432                         return true;
1433                 }
1434         }
1435
1436         up_read(&io->io_rwsem);
1437         return false;
1438 }
1439
1440 void f2fs_wait_on_page_writeback(struct page *page,
1441                                 enum page_type type)
1442 {
1443         if (PageWriteback(page)) {
1444                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1445
1446                 if (is_merged_page(sbi, page, type))
1447                         f2fs_submit_merged_bio(sbi, type, WRITE);
1448                 wait_on_page_writeback(page);
1449         }
1450 }
1451
1452 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1453 {
1454         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1455         struct curseg_info *seg_i;
1456         unsigned char *kaddr;
1457         struct page *page;
1458         block_t start;
1459         int i, j, offset;
1460
1461         start = start_sum_block(sbi);
1462
1463         page = get_meta_page(sbi, start++);
1464         kaddr = (unsigned char *)page_address(page);
1465
1466         /* Step 1: restore nat cache */
1467         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1468         memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1469
1470         /* Step 2: restore sit cache */
1471         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1472         memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1473                                                 SUM_JOURNAL_SIZE);
1474         offset = 2 * SUM_JOURNAL_SIZE;
1475
1476         /* Step 3: restore summary entries */
1477         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1478                 unsigned short blk_off;
1479                 unsigned int segno;
1480
1481                 seg_i = CURSEG_I(sbi, i);
1482                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1483                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1484                 seg_i->next_segno = segno;
1485                 reset_curseg(sbi, i, 0);
1486                 seg_i->alloc_type = ckpt->alloc_type[i];
1487                 seg_i->next_blkoff = blk_off;
1488
1489                 if (seg_i->alloc_type == SSR)
1490                         blk_off = sbi->blocks_per_seg;
1491
1492                 for (j = 0; j < blk_off; j++) {
1493                         struct f2fs_summary *s;
1494                         s = (struct f2fs_summary *)(kaddr + offset);
1495                         seg_i->sum_blk->entries[j] = *s;
1496                         offset += SUMMARY_SIZE;
1497                         if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1498                                                 SUM_FOOTER_SIZE)
1499                                 continue;
1500
1501                         f2fs_put_page(page, 1);
1502                         page = NULL;
1503
1504                         page = get_meta_page(sbi, start++);
1505                         kaddr = (unsigned char *)page_address(page);
1506                         offset = 0;
1507                 }
1508         }
1509         f2fs_put_page(page, 1);
1510         return 0;
1511 }
1512
1513 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1514 {
1515         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1516         struct f2fs_summary_block *sum;
1517         struct curseg_info *curseg;
1518         struct page *new;
1519         unsigned short blk_off;
1520         unsigned int segno = 0;
1521         block_t blk_addr = 0;
1522
1523         /* get segment number and block addr */
1524         if (IS_DATASEG(type)) {
1525                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1526                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1527                                                         CURSEG_HOT_DATA]);
1528                 if (__exist_node_summaries(sbi))
1529                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1530                 else
1531                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1532         } else {
1533                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1534                                                         CURSEG_HOT_NODE]);
1535                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1536                                                         CURSEG_HOT_NODE]);
1537                 if (__exist_node_summaries(sbi))
1538                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1539                                                         type - CURSEG_HOT_NODE);
1540                 else
1541                         blk_addr = GET_SUM_BLOCK(sbi, segno);
1542         }
1543
1544         new = get_meta_page(sbi, blk_addr);
1545         sum = (struct f2fs_summary_block *)page_address(new);
1546
1547         if (IS_NODESEG(type)) {
1548                 if (__exist_node_summaries(sbi)) {
1549                         struct f2fs_summary *ns = &sum->entries[0];
1550                         int i;
1551                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1552                                 ns->version = 0;
1553                                 ns->ofs_in_node = 0;
1554                         }
1555                 } else {
1556                         int err;
1557
1558                         err = restore_node_summary(sbi, segno, sum);
1559                         if (err) {
1560                                 f2fs_put_page(new, 1);
1561                                 return err;
1562                         }
1563                 }
1564         }
1565
1566         /* set uncompleted segment to curseg */
1567         curseg = CURSEG_I(sbi, type);
1568         mutex_lock(&curseg->curseg_mutex);
1569         memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1570         curseg->next_segno = segno;
1571         reset_curseg(sbi, type, 0);
1572         curseg->alloc_type = ckpt->alloc_type[type];
1573         curseg->next_blkoff = blk_off;
1574         mutex_unlock(&curseg->curseg_mutex);
1575         f2fs_put_page(new, 1);
1576         return 0;
1577 }
1578
1579 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1580 {
1581         int type = CURSEG_HOT_DATA;
1582         int err;
1583
1584         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1585                 int npages = npages_for_summary_flush(sbi, true);
1586
1587                 if (npages >= 2)
1588                         ra_meta_pages(sbi, start_sum_block(sbi), npages,
1589                                                                 META_CP);
1590
1591                 /* restore for compacted data summary */
1592                 if (read_compacted_summaries(sbi))
1593                         return -EINVAL;
1594                 type = CURSEG_HOT_NODE;
1595         }
1596
1597         if (__exist_node_summaries(sbi))
1598                 ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
1599                                         NR_CURSEG_TYPE - type, META_CP);
1600
1601         for (; type <= CURSEG_COLD_NODE; type++) {
1602                 err = read_normal_summaries(sbi, type);
1603                 if (err)
1604                         return err;
1605         }
1606
1607         return 0;
1608 }
1609
1610 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1611 {
1612         struct page *page;
1613         unsigned char *kaddr;
1614         struct f2fs_summary *summary;
1615         struct curseg_info *seg_i;
1616         int written_size = 0;
1617         int i, j;
1618
1619         page = grab_meta_page(sbi, blkaddr++);
1620         kaddr = (unsigned char *)page_address(page);
1621
1622         /* Step 1: write nat cache */
1623         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1624         memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1625         written_size += SUM_JOURNAL_SIZE;
1626
1627         /* Step 2: write sit cache */
1628         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1629         memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1630                                                 SUM_JOURNAL_SIZE);
1631         written_size += SUM_JOURNAL_SIZE;
1632
1633         /* Step 3: write summary entries */
1634         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1635                 unsigned short blkoff;
1636                 seg_i = CURSEG_I(sbi, i);
1637                 if (sbi->ckpt->alloc_type[i] == SSR)
1638                         blkoff = sbi->blocks_per_seg;
1639                 else
1640                         blkoff = curseg_blkoff(sbi, i);
1641
1642                 for (j = 0; j < blkoff; j++) {
1643                         if (!page) {
1644                                 page = grab_meta_page(sbi, blkaddr++);
1645                                 kaddr = (unsigned char *)page_address(page);
1646                                 written_size = 0;
1647                         }
1648                         summary = (struct f2fs_summary *)(kaddr + written_size);
1649                         *summary = seg_i->sum_blk->entries[j];
1650                         written_size += SUMMARY_SIZE;
1651
1652                         if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1653                                                         SUM_FOOTER_SIZE)
1654                                 continue;
1655
1656                         set_page_dirty(page);
1657                         f2fs_put_page(page, 1);
1658                         page = NULL;
1659                 }
1660         }
1661         if (page) {
1662                 set_page_dirty(page);
1663                 f2fs_put_page(page, 1);
1664         }
1665 }
1666
1667 static void write_normal_summaries(struct f2fs_sb_info *sbi,
1668                                         block_t blkaddr, int type)
1669 {
1670         int i, end;
1671         if (IS_DATASEG(type))
1672                 end = type + NR_CURSEG_DATA_TYPE;
1673         else
1674                 end = type + NR_CURSEG_NODE_TYPE;
1675
1676         for (i = type; i < end; i++) {
1677                 struct curseg_info *sum = CURSEG_I(sbi, i);
1678                 mutex_lock(&sum->curseg_mutex);
1679                 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1680                 mutex_unlock(&sum->curseg_mutex);
1681         }
1682 }
1683
1684 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1685 {
1686         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1687                 write_compacted_summaries(sbi, start_blk);
1688         else
1689                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1690 }
1691
1692 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1693 {
1694         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1695 }
1696
1697 int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1698                                         unsigned int val, int alloc)
1699 {
1700         int i;
1701
1702         if (type == NAT_JOURNAL) {
1703                 for (i = 0; i < nats_in_cursum(sum); i++) {
1704                         if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1705                                 return i;
1706                 }
1707                 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1708                         return update_nats_in_cursum(sum, 1);
1709         } else if (type == SIT_JOURNAL) {
1710                 for (i = 0; i < sits_in_cursum(sum); i++)
1711                         if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1712                                 return i;
1713                 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1714                         return update_sits_in_cursum(sum, 1);
1715         }
1716         return -1;
1717 }
1718
1719 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1720                                         unsigned int segno)
1721 {
1722         return get_meta_page(sbi, current_sit_addr(sbi, segno));
1723 }
1724
1725 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1726                                         unsigned int start)
1727 {
1728         struct sit_info *sit_i = SIT_I(sbi);
1729         struct page *src_page, *dst_page;
1730         pgoff_t src_off, dst_off;
1731         void *src_addr, *dst_addr;
1732
1733         src_off = current_sit_addr(sbi, start);
1734         dst_off = next_sit_addr(sbi, src_off);
1735
1736         /* get current sit block page without lock */
1737         src_page = get_meta_page(sbi, src_off);
1738         dst_page = grab_meta_page(sbi, dst_off);
1739         f2fs_bug_on(sbi, PageDirty(src_page));
1740
1741         src_addr = page_address(src_page);
1742         dst_addr = page_address(dst_page);
1743         memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1744
1745         set_page_dirty(dst_page);
1746         f2fs_put_page(src_page, 1);
1747
1748         set_to_next_sit(sit_i, start);
1749
1750         return dst_page;
1751 }
1752
1753 static struct sit_entry_set *grab_sit_entry_set(void)
1754 {
1755         struct sit_entry_set *ses =
1756                         f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
1757
1758         ses->entry_cnt = 0;
1759         INIT_LIST_HEAD(&ses->set_list);
1760         return ses;
1761 }
1762
1763 static void release_sit_entry_set(struct sit_entry_set *ses)
1764 {
1765         list_del(&ses->set_list);
1766         kmem_cache_free(sit_entry_set_slab, ses);
1767 }
1768
1769 static void adjust_sit_entry_set(struct sit_entry_set *ses,
1770                                                 struct list_head *head)
1771 {
1772         struct sit_entry_set *next = ses;
1773
1774         if (list_is_last(&ses->set_list, head))
1775                 return;
1776
1777         list_for_each_entry_continue(next, head, set_list)
1778                 if (ses->entry_cnt <= next->entry_cnt)
1779                         break;
1780
1781         list_move_tail(&ses->set_list, &next->set_list);
1782 }
1783
1784 static void add_sit_entry(unsigned int segno, struct list_head *head)
1785 {
1786         struct sit_entry_set *ses;
1787         unsigned int start_segno = START_SEGNO(segno);
1788
1789         list_for_each_entry(ses, head, set_list) {
1790                 if (ses->start_segno == start_segno) {
1791                         ses->entry_cnt++;
1792                         adjust_sit_entry_set(ses, head);
1793                         return;
1794                 }
1795         }
1796
1797         ses = grab_sit_entry_set();
1798
1799         ses->start_segno = start_segno;
1800         ses->entry_cnt++;
1801         list_add(&ses->set_list, head);
1802 }
1803
1804 static void add_sits_in_set(struct f2fs_sb_info *sbi)
1805 {
1806         struct f2fs_sm_info *sm_info = SM_I(sbi);
1807         struct list_head *set_list = &sm_info->sit_entry_set;
1808         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1809         unsigned int segno;
1810
1811         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1812                 add_sit_entry(segno, set_list);
1813 }
1814
1815 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1816 {
1817         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1818         struct f2fs_summary_block *sum = curseg->sum_blk;
1819         int i;
1820
1821         for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1822                 unsigned int segno;
1823                 bool dirtied;
1824
1825                 segno = le32_to_cpu(segno_in_journal(sum, i));
1826                 dirtied = __mark_sit_entry_dirty(sbi, segno);
1827
1828                 if (!dirtied)
1829                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1830         }
1831         update_sits_in_cursum(sum, -sits_in_cursum(sum));
1832 }
1833
1834 /*
1835  * CP calls this function, which flushes SIT entries including sit_journal,
1836  * and moves prefree segs to free segs.
1837  */
1838 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1839 {
1840         struct sit_info *sit_i = SIT_I(sbi);
1841         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1842         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1843         struct f2fs_summary_block *sum = curseg->sum_blk;
1844         struct sit_entry_set *ses, *tmp;
1845         struct list_head *head = &SM_I(sbi)->sit_entry_set;
1846         bool to_journal = true;
1847         struct seg_entry *se;
1848
1849         mutex_lock(&curseg->curseg_mutex);
1850         mutex_lock(&sit_i->sentry_lock);
1851
1852         if (!sit_i->dirty_sentries)
1853                 goto out;
1854
1855         /*
1856          * add and account sit entries of dirty bitmap in sit entry
1857          * set temporarily
1858          */
1859         add_sits_in_set(sbi);
1860
1861         /*
1862          * if there are no enough space in journal to store dirty sit
1863          * entries, remove all entries from journal and add and account
1864          * them in sit entry set.
1865          */
1866         if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1867                 remove_sits_in_journal(sbi);
1868
1869         /*
1870          * there are two steps to flush sit entries:
1871          * #1, flush sit entries to journal in current cold data summary block.
1872          * #2, flush sit entries to sit page.
1873          */
1874         list_for_each_entry_safe(ses, tmp, head, set_list) {
1875                 struct page *page = NULL;
1876                 struct f2fs_sit_block *raw_sit = NULL;
1877                 unsigned int start_segno = ses->start_segno;
1878                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1879                                                 (unsigned long)MAIN_SEGS(sbi));
1880                 unsigned int segno = start_segno;
1881
1882                 if (to_journal &&
1883                         !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1884                         to_journal = false;
1885
1886                 if (!to_journal) {
1887                         page = get_next_sit_page(sbi, start_segno);
1888                         raw_sit = page_address(page);
1889                 }
1890
1891                 /* flush dirty sit entries in region of current sit set */
1892                 for_each_set_bit_from(segno, bitmap, end) {
1893                         int offset, sit_offset;
1894
1895                         se = get_seg_entry(sbi, segno);
1896
1897                         /* add discard candidates */
1898                         if (cpc->reason != CP_DISCARD) {
1899                                 cpc->trim_start = segno;
1900                                 add_discard_addrs(sbi, cpc);
1901                         }
1902
1903                         if (to_journal) {
1904                                 offset = lookup_journal_in_cursum(sum,
1905                                                         SIT_JOURNAL, segno, 1);
1906                                 f2fs_bug_on(sbi, offset < 0);
1907                                 segno_in_journal(sum, offset) =
1908                                                         cpu_to_le32(segno);
1909                                 seg_info_to_raw_sit(se,
1910                                                 &sit_in_journal(sum, offset));
1911                         } else {
1912                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1913                                 seg_info_to_raw_sit(se,
1914                                                 &raw_sit->entries[sit_offset]);
1915                         }
1916
1917                         __clear_bit(segno, bitmap);
1918                         sit_i->dirty_sentries--;
1919                         ses->entry_cnt--;
1920                 }
1921
1922                 if (!to_journal)
1923                         f2fs_put_page(page, 1);
1924
1925                 f2fs_bug_on(sbi, ses->entry_cnt);
1926                 release_sit_entry_set(ses);
1927         }
1928
1929         f2fs_bug_on(sbi, !list_empty(head));
1930         f2fs_bug_on(sbi, sit_i->dirty_sentries);
1931 out:
1932         if (cpc->reason == CP_DISCARD) {
1933                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1934                         add_discard_addrs(sbi, cpc);
1935         }
1936         mutex_unlock(&sit_i->sentry_lock);
1937         mutex_unlock(&curseg->curseg_mutex);
1938
1939         set_prefree_as_free_segments(sbi);
1940 }
1941
1942 static int build_sit_info(struct f2fs_sb_info *sbi)
1943 {
1944         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1945         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1946         struct sit_info *sit_i;
1947         unsigned int sit_segs, start;
1948         char *src_bitmap, *dst_bitmap;
1949         unsigned int bitmap_size;
1950
1951         /* allocate memory for SIT information */
1952         sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1953         if (!sit_i)
1954                 return -ENOMEM;
1955
1956         SM_I(sbi)->sit_info = sit_i;
1957
1958         sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1959         if (!sit_i->sentries)
1960                 return -ENOMEM;
1961
1962         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1963         sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1964         if (!sit_i->dirty_sentries_bitmap)
1965                 return -ENOMEM;
1966
1967         for (start = 0; start < MAIN_SEGS(sbi); start++) {
1968                 sit_i->sentries[start].cur_valid_map
1969                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1970                 sit_i->sentries[start].ckpt_valid_map
1971                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1972                 sit_i->sentries[start].discard_map
1973                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1974                 if (!sit_i->sentries[start].cur_valid_map ||
1975                                 !sit_i->sentries[start].ckpt_valid_map ||
1976                                 !sit_i->sentries[start].discard_map)
1977                         return -ENOMEM;
1978         }
1979
1980         sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1981         if (!sit_i->tmp_map)
1982                 return -ENOMEM;
1983
1984         if (sbi->segs_per_sec > 1) {
1985                 sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1986                                         sizeof(struct sec_entry));
1987                 if (!sit_i->sec_entries)
1988                         return -ENOMEM;
1989         }
1990
1991         /* get information related with SIT */
1992         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1993
1994         /* setup SIT bitmap from ckeckpoint pack */
1995         bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1996         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1997
1998         dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1999         if (!dst_bitmap)
2000                 return -ENOMEM;
2001
2002         /* init SIT information */
2003         sit_i->s_ops = &default_salloc_ops;
2004
2005         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
2006         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
2007         sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
2008         sit_i->sit_bitmap = dst_bitmap;
2009         sit_i->bitmap_size = bitmap_size;
2010         sit_i->dirty_sentries = 0;
2011         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
2012         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
2013         sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
2014         mutex_init(&sit_i->sentry_lock);
2015         return 0;
2016 }
2017
2018 static int build_free_segmap(struct f2fs_sb_info *sbi)
2019 {
2020         struct free_segmap_info *free_i;
2021         unsigned int bitmap_size, sec_bitmap_size;
2022
2023         /* allocate memory for free segmap information */
2024         free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
2025         if (!free_i)
2026                 return -ENOMEM;
2027
2028         SM_I(sbi)->free_info = free_i;
2029
2030         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2031         free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
2032         if (!free_i->free_segmap)
2033                 return -ENOMEM;
2034
2035         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2036         free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
2037         if (!free_i->free_secmap)
2038                 return -ENOMEM;
2039
2040         /* set all segments as dirty temporarily */
2041         memset(free_i->free_segmap, 0xff, bitmap_size);
2042         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
2043
2044         /* init free segmap information */
2045         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
2046         free_i->free_segments = 0;
2047         free_i->free_sections = 0;
2048         spin_lock_init(&free_i->segmap_lock);
2049         return 0;
2050 }
2051
2052 static int build_curseg(struct f2fs_sb_info *sbi)
2053 {
2054         struct curseg_info *array;
2055         int i;
2056
2057         array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
2058         if (!array)
2059                 return -ENOMEM;
2060
2061         SM_I(sbi)->curseg_array = array;
2062
2063         for (i = 0; i < NR_CURSEG_TYPE; i++) {
2064                 mutex_init(&array[i].curseg_mutex);
2065                 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
2066                 if (!array[i].sum_blk)
2067                         return -ENOMEM;
2068                 array[i].segno = NULL_SEGNO;
2069                 array[i].next_blkoff = 0;
2070         }
2071         return restore_curseg_summaries(sbi);
2072 }
2073
2074 static void build_sit_entries(struct f2fs_sb_info *sbi)
2075 {
2076         struct sit_info *sit_i = SIT_I(sbi);
2077         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
2078         struct f2fs_summary_block *sum = curseg->sum_blk;
2079         int sit_blk_cnt = SIT_BLK_CNT(sbi);
2080         unsigned int i, start, end;
2081         unsigned int readed, start_blk = 0;
2082         int nrpages = MAX_BIO_BLOCKS(sbi);
2083
2084         do {
2085                 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
2086
2087                 start = start_blk * sit_i->sents_per_block;
2088                 end = (start_blk + readed) * sit_i->sents_per_block;
2089
2090                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
2091                         struct seg_entry *se = &sit_i->sentries[start];
2092                         struct f2fs_sit_block *sit_blk;
2093                         struct f2fs_sit_entry sit;
2094                         struct page *page;
2095
2096                         mutex_lock(&curseg->curseg_mutex);
2097                         for (i = 0; i < sits_in_cursum(sum); i++) {
2098                                 if (le32_to_cpu(segno_in_journal(sum, i))
2099                                                                 == start) {
2100                                         sit = sit_in_journal(sum, i);
2101                                         mutex_unlock(&curseg->curseg_mutex);
2102                                         goto got_it;
2103                                 }
2104                         }
2105                         mutex_unlock(&curseg->curseg_mutex);
2106
2107                         page = get_current_sit_page(sbi, start);
2108                         sit_blk = (struct f2fs_sit_block *)page_address(page);
2109                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
2110                         f2fs_put_page(page, 1);
2111 got_it:
2112                         check_block_count(sbi, start, &sit);
2113                         seg_info_from_raw_sit(se, &sit);
2114
2115                         /* build discard map only one time */
2116                         memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
2117                         sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks;
2118
2119                         if (sbi->segs_per_sec > 1) {
2120                                 struct sec_entry *e = get_sec_entry(sbi, start);
2121                                 e->valid_blocks += se->valid_blocks;
2122                         }
2123                 }
2124                 start_blk += readed;
2125         } while (start_blk < sit_blk_cnt);
2126 }
2127
2128 static void init_free_segmap(struct f2fs_sb_info *sbi)
2129 {
2130         unsigned int start;
2131         int type;
2132
2133         for (start = 0; start < MAIN_SEGS(sbi); start++) {
2134                 struct seg_entry *sentry = get_seg_entry(sbi, start);
2135                 if (!sentry->valid_blocks)
2136                         __set_free(sbi, start);
2137         }
2138
2139         /* set use the current segments */
2140         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
2141                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
2142                 __set_test_and_inuse(sbi, curseg_t->segno);
2143         }
2144 }
2145
2146 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
2147 {
2148         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2149         struct free_segmap_info *free_i = FREE_I(sbi);
2150         unsigned int segno = 0, offset = 0;
2151         unsigned short valid_blocks;
2152
2153         while (1) {
2154                 /* find dirty segment based on free segmap */
2155                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
2156                 if (segno >= MAIN_SEGS(sbi))
2157                         break;
2158                 offset = segno + 1;
2159                 valid_blocks = get_valid_blocks(sbi, segno, 0);
2160                 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2161                         continue;
2162                 if (valid_blocks > sbi->blocks_per_seg) {
2163                         f2fs_bug_on(sbi, 1);
2164                         continue;
2165                 }
2166                 mutex_lock(&dirty_i->seglist_lock);
2167                 __locate_dirty_segment(sbi, segno, DIRTY);
2168                 mutex_unlock(&dirty_i->seglist_lock);
2169         }
2170 }
2171
2172 static int init_victim_secmap(struct f2fs_sb_info *sbi)
2173 {
2174         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2175         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2176
2177         dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
2178         if (!dirty_i->victim_secmap)
2179                 return -ENOMEM;
2180         return 0;
2181 }
2182
2183 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2184 {
2185         struct dirty_seglist_info *dirty_i;
2186         unsigned int bitmap_size, i;
2187
2188         /* allocate memory for dirty segments list information */
2189         dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2190         if (!dirty_i)
2191                 return -ENOMEM;
2192
2193         SM_I(sbi)->dirty_info = dirty_i;
2194         mutex_init(&dirty_i->seglist_lock);
2195
2196         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2197
2198         for (i = 0; i < NR_DIRTY_TYPE; i++) {
2199                 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
2200                 if (!dirty_i->dirty_segmap[i])
2201                         return -ENOMEM;
2202         }
2203
2204         init_dirty_segmap(sbi);
2205         return init_victim_secmap(sbi);
2206 }
2207
2208 /*
2209  * Update min, max modified time for cost-benefit GC algorithm
2210  */
2211 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2212 {
2213         struct sit_info *sit_i = SIT_I(sbi);
2214         unsigned int segno;
2215
2216         mutex_lock(&sit_i->sentry_lock);
2217
2218         sit_i->min_mtime = LLONG_MAX;
2219
2220         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2221                 unsigned int i;
2222                 unsigned long long mtime = 0;
2223
2224                 for (i = 0; i < sbi->segs_per_sec; i++)
2225                         mtime += get_seg_entry(sbi, segno + i)->mtime;
2226
2227                 mtime = div_u64(mtime, sbi->segs_per_sec);
2228
2229                 if (sit_i->min_mtime > mtime)
2230                         sit_i->min_mtime = mtime;
2231         }
2232         sit_i->max_mtime = get_mtime(sbi);
2233         mutex_unlock(&sit_i->sentry_lock);
2234 }
2235
2236 int build_segment_manager(struct f2fs_sb_info *sbi)
2237 {
2238         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2239         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2240         struct f2fs_sm_info *sm_info;
2241         int err;
2242
2243         sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2244         if (!sm_info)
2245                 return -ENOMEM;
2246
2247         /* init sm info */
2248         sbi->sm_info = sm_info;
2249         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2250         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2251         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2252         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2253         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2254         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2255         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2256         sm_info->rec_prefree_segments = sm_info->main_segments *
2257                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2258         sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2259         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2260         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2261
2262         INIT_LIST_HEAD(&sm_info->discard_list);
2263         sm_info->nr_discards = 0;
2264         sm_info->max_discards = 0;
2265
2266         sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2267
2268         INIT_LIST_HEAD(&sm_info->sit_entry_set);
2269
2270         if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2271                 err = create_flush_cmd_control(sbi);
2272                 if (err)
2273                         return err;
2274         }
2275
2276         err = build_sit_info(sbi);
2277         if (err)
2278                 return err;
2279         err = build_free_segmap(sbi);
2280         if (err)
2281                 return err;
2282         err = build_curseg(sbi);
2283         if (err)
2284                 return err;
2285
2286         /* reinit free segmap based on SIT */
2287         build_sit_entries(sbi);
2288
2289         init_free_segmap(sbi);
2290         err = build_dirty_segmap(sbi);
2291         if (err)
2292                 return err;
2293
2294         init_min_max_mtime(sbi);
2295         return 0;
2296 }
2297
2298 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2299                 enum dirty_type dirty_type)
2300 {
2301         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2302
2303         mutex_lock(&dirty_i->seglist_lock);
2304         kfree(dirty_i->dirty_segmap[dirty_type]);
2305         dirty_i->nr_dirty[dirty_type] = 0;
2306         mutex_unlock(&dirty_i->seglist_lock);
2307 }
2308
2309 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2310 {
2311         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2312         kfree(dirty_i->victim_secmap);
2313 }
2314
2315 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2316 {
2317         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2318         int i;
2319
2320         if (!dirty_i)
2321                 return;
2322
2323         /* discard pre-free/dirty segments list */
2324         for (i = 0; i < NR_DIRTY_TYPE; i++)
2325                 discard_dirty_segmap(sbi, i);
2326
2327         destroy_victim_secmap(sbi);
2328         SM_I(sbi)->dirty_info = NULL;
2329         kfree(dirty_i);
2330 }
2331
2332 static void destroy_curseg(struct f2fs_sb_info *sbi)
2333 {
2334         struct curseg_info *array = SM_I(sbi)->curseg_array;
2335         int i;
2336
2337         if (!array)
2338                 return;
2339         SM_I(sbi)->curseg_array = NULL;
2340         for (i = 0; i < NR_CURSEG_TYPE; i++)
2341                 kfree(array[i].sum_blk);
2342         kfree(array);
2343 }
2344
2345 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2346 {
2347         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2348         if (!free_i)
2349                 return;
2350         SM_I(sbi)->free_info = NULL;
2351         kfree(free_i->free_segmap);
2352         kfree(free_i->free_secmap);
2353         kfree(free_i);
2354 }
2355
2356 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2357 {
2358         struct sit_info *sit_i = SIT_I(sbi);
2359         unsigned int start;
2360
2361         if (!sit_i)
2362                 return;
2363
2364         if (sit_i->sentries) {
2365                 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2366                         kfree(sit_i->sentries[start].cur_valid_map);
2367                         kfree(sit_i->sentries[start].ckpt_valid_map);
2368                         kfree(sit_i->sentries[start].discard_map);
2369                 }
2370         }
2371         kfree(sit_i->tmp_map);
2372
2373         vfree(sit_i->sentries);
2374         vfree(sit_i->sec_entries);
2375         kfree(sit_i->dirty_sentries_bitmap);
2376
2377         SM_I(sbi)->sit_info = NULL;
2378         kfree(sit_i->sit_bitmap);
2379         kfree(sit_i);
2380 }
2381
2382 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2383 {
2384         struct f2fs_sm_info *sm_info = SM_I(sbi);
2385
2386         if (!sm_info)
2387                 return;
2388         destroy_flush_cmd_control(sbi);
2389         destroy_dirty_segmap(sbi);
2390         destroy_curseg(sbi);
2391         destroy_free_segmap(sbi);
2392         destroy_sit_info(sbi);
2393         sbi->sm_info = NULL;
2394         kfree(sm_info);
2395 }
2396
2397 int __init create_segment_manager_caches(void)
2398 {
2399         discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2400                         sizeof(struct discard_entry));
2401         if (!discard_entry_slab)
2402                 goto fail;
2403
2404         sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2405                         sizeof(struct sit_entry_set));
2406         if (!sit_entry_set_slab)
2407                 goto destory_discard_entry;
2408
2409         inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2410                         sizeof(struct inmem_pages));
2411         if (!inmem_entry_slab)
2412                 goto destroy_sit_entry_set;
2413         return 0;
2414
2415 destroy_sit_entry_set:
2416         kmem_cache_destroy(sit_entry_set_slab);
2417 destory_discard_entry:
2418         kmem_cache_destroy(discard_entry_slab);
2419 fail:
2420         return -ENOMEM;
2421 }
2422
2423 void destroy_segment_manager_caches(void)
2424 {
2425         kmem_cache_destroy(sit_entry_set_slab);
2426         kmem_cache_destroy(discard_entry_slab);
2427         kmem_cache_destroy(inmem_entry_slab);
2428 }