]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/f2fs/segment.c
fecf69900f94a23ebdba8fa6807aa18ed61045ae
[karo-tx-linux.git] / fs / f2fs / segment.c
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/vmalloc.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "segment.h"
22 #include "node.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25
26 #define __reverse_ffz(x) __reverse_ffs(~(x))
27
28 static struct kmem_cache *discard_entry_slab;
29 static struct kmem_cache *sit_entry_set_slab;
30 static struct kmem_cache *inmem_entry_slab;
31
32 /*
33  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
34  * MSB and LSB are reversed in a byte by f2fs_set_bit.
35  */
36 static inline unsigned long __reverse_ffs(unsigned long word)
37 {
38         int num = 0;
39
40 #if BITS_PER_LONG == 64
41         if ((word & 0xffffffff) == 0) {
42                 num += 32;
43                 word >>= 32;
44         }
45 #endif
46         if ((word & 0xffff) == 0) {
47                 num += 16;
48                 word >>= 16;
49         }
50         if ((word & 0xff) == 0) {
51                 num += 8;
52                 word >>= 8;
53         }
54         if ((word & 0xf0) == 0)
55                 num += 4;
56         else
57                 word >>= 4;
58         if ((word & 0xc) == 0)
59                 num += 2;
60         else
61                 word >>= 2;
62         if ((word & 0x2) == 0)
63                 num += 1;
64         return num;
65 }
66
67 /*
68  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
69  * f2fs_set_bit makes MSB and LSB reversed in a byte.
70  * Example:
71  *                             LSB <--> MSB
72  *   f2fs_set_bit(0, bitmap) => 0000 0001
73  *   f2fs_set_bit(7, bitmap) => 1000 0000
74  */
75 static unsigned long __find_rev_next_bit(const unsigned long *addr,
76                         unsigned long size, unsigned long offset)
77 {
78         while (!f2fs_test_bit(offset, (unsigned char *)addr))
79                 offset++;
80
81         if (offset > size)
82                 offset = size;
83
84         return offset;
85 #if 0
86         const unsigned long *p = addr + BIT_WORD(offset);
87         unsigned long result = offset & ~(BITS_PER_LONG - 1);
88         unsigned long tmp;
89         unsigned long mask, submask;
90         unsigned long quot, rest;
91
92         if (offset >= size)
93                 return size;
94
95         size -= result;
96         offset %= BITS_PER_LONG;
97         if (!offset)
98                 goto aligned;
99
100         tmp = *(p++);
101         quot = (offset >> 3) << 3;
102         rest = offset & 0x7;
103         mask = ~0UL << quot;
104         submask = (unsigned char)(0xff << rest) >> rest;
105         submask <<= quot;
106         mask &= submask;
107         tmp &= mask;
108         if (size < BITS_PER_LONG)
109                 goto found_first;
110         if (tmp)
111                 goto found_middle;
112
113         size -= BITS_PER_LONG;
114         result += BITS_PER_LONG;
115 aligned:
116         while (size & ~(BITS_PER_LONG-1)) {
117                 tmp = *(p++);
118                 if (tmp)
119                         goto found_middle;
120                 result += BITS_PER_LONG;
121                 size -= BITS_PER_LONG;
122         }
123         if (!size)
124                 return result;
125         tmp = *p;
126 found_first:
127         tmp &= (~0UL >> (BITS_PER_LONG - size));
128         if (tmp == 0UL)         /* Are any bits set? */
129                 return result + size;   /* Nope. */
130 found_middle:
131         return result + __reverse_ffs(tmp);
132 #endif
133 }
134
135 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
136                         unsigned long size, unsigned long offset)
137 {
138         while (f2fs_test_bit(offset, (unsigned char *)addr))
139                 offset++;
140
141         if (offset > size)
142                 offset = size;
143
144         return offset;
145 #if 0
146         const unsigned long *p = addr + BIT_WORD(offset);
147         unsigned long result = offset & ~(BITS_PER_LONG - 1);
148         unsigned long tmp;
149         unsigned long mask, submask;
150         unsigned long quot, rest;
151
152         if (offset >= size)
153                 return size;
154
155         size -= result;
156         offset %= BITS_PER_LONG;
157         if (!offset)
158                 goto aligned;
159
160         tmp = *(p++);
161         quot = (offset >> 3) << 3;
162         rest = offset & 0x7;
163         mask = ~(~0UL << quot);
164         submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
165         submask <<= quot;
166         mask += submask;
167         tmp |= mask;
168         if (size < BITS_PER_LONG)
169                 goto found_first;
170         if (~tmp)
171                 goto found_middle;
172
173         size -= BITS_PER_LONG;
174         result += BITS_PER_LONG;
175 aligned:
176         while (size & ~(BITS_PER_LONG - 1)) {
177                 tmp = *(p++);
178                 if (~tmp)
179                         goto found_middle;
180                 result += BITS_PER_LONG;
181                 size -= BITS_PER_LONG;
182         }
183         if (!size)
184                 return result;
185         tmp = *p;
186
187 found_first:
188         tmp |= ~0UL << size;
189         if (tmp == ~0UL)        /* Are any bits zero? */
190                 return result + size;   /* Nope. */
191 found_middle:
192         return result + __reverse_ffz(tmp);
193 #endif
194 }
195
196 void register_inmem_page(struct inode *inode, struct page *page)
197 {
198         struct f2fs_inode_info *fi = F2FS_I(inode);
199         struct inmem_pages *new;
200         int err;
201
202         SetPagePrivate(page);
203         f2fs_trace_pid(page);
204
205         new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
206
207         /* add atomic page indices to the list */
208         new->page = page;
209         INIT_LIST_HEAD(&new->list);
210 retry:
211         /* increase reference count with clean state */
212         mutex_lock(&fi->inmem_lock);
213         err = radix_tree_insert(&fi->inmem_root, page->index, new);
214         if (err == -EEXIST) {
215                 mutex_unlock(&fi->inmem_lock);
216                 kmem_cache_free(inmem_entry_slab, new);
217                 return;
218         } else if (err) {
219                 mutex_unlock(&fi->inmem_lock);
220                 goto retry;
221         }
222         get_page(page);
223         list_add_tail(&new->list, &fi->inmem_pages);
224         inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
225         mutex_unlock(&fi->inmem_lock);
226
227         trace_f2fs_register_inmem_page(page, INMEM);
228 }
229
230 void commit_inmem_pages(struct inode *inode, bool abort)
231 {
232         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
233         struct f2fs_inode_info *fi = F2FS_I(inode);
234         struct inmem_pages *cur, *tmp;
235         bool submit_bio = false;
236         struct f2fs_io_info fio = {
237                 .sbi = sbi,
238                 .type = DATA,
239                 .rw = WRITE_SYNC | REQ_PRIO,
240                 .encrypted_page = NULL,
241         };
242
243         /*
244          * The abort is true only when f2fs_evict_inode is called.
245          * Basically, the f2fs_evict_inode doesn't produce any data writes, so
246          * that we don't need to call f2fs_balance_fs.
247          * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
248          * inode becomes free by iget_locked in f2fs_iget.
249          */
250         if (!abort) {
251                 f2fs_balance_fs(sbi);
252                 f2fs_lock_op(sbi);
253         }
254
255         mutex_lock(&fi->inmem_lock);
256         list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
257                 if (!abort) {
258                         lock_page(cur->page);
259                         if (cur->page->mapping == inode->i_mapping) {
260                                 f2fs_wait_on_page_writeback(cur->page, DATA);
261                                 if (clear_page_dirty_for_io(cur->page))
262                                         inode_dec_dirty_pages(inode);
263                                 trace_f2fs_commit_inmem_page(cur->page, INMEM);
264                                 fio.page = cur->page;
265                                 do_write_data_page(&fio);
266                                 submit_bio = true;
267                         }
268                         f2fs_put_page(cur->page, 1);
269                 } else {
270                         trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
271                         put_page(cur->page);
272                 }
273                 radix_tree_delete(&fi->inmem_root, cur->page->index);
274                 list_del(&cur->list);
275                 kmem_cache_free(inmem_entry_slab, cur);
276                 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
277         }
278         mutex_unlock(&fi->inmem_lock);
279
280         if (!abort) {
281                 f2fs_unlock_op(sbi);
282                 if (submit_bio)
283                         f2fs_submit_merged_bio(sbi, DATA, WRITE);
284         }
285 }
286
287 /*
288  * This function balances dirty node and dentry pages.
289  * In addition, it controls garbage collection.
290  */
291 void f2fs_balance_fs(struct f2fs_sb_info *sbi)
292 {
293         /*
294          * We should do GC or end up with checkpoint, if there are so many dirty
295          * dir/node pages without enough free segments.
296          */
297         if (has_not_enough_free_secs(sbi, 0)) {
298                 mutex_lock(&sbi->gc_mutex);
299                 f2fs_gc(sbi);
300         }
301 }
302
303 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
304 {
305         /* try to shrink extent cache when there is no enough memory */
306         f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
307
308         /* check the # of cached NAT entries and prefree segments */
309         if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
310                         excess_prefree_segs(sbi) ||
311                         !available_free_memory(sbi, INO_ENTRIES))
312                 f2fs_sync_fs(sbi->sb, true);
313 }
314
315 static int issue_flush_thread(void *data)
316 {
317         struct f2fs_sb_info *sbi = data;
318         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
319         wait_queue_head_t *q = &fcc->flush_wait_queue;
320 repeat:
321         if (kthread_should_stop())
322                 return 0;
323
324         if (!llist_empty(&fcc->issue_list)) {
325                 struct bio *bio = bio_alloc(GFP_NOIO, 0);
326                 struct flush_cmd *cmd, *next;
327                 int ret;
328
329                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
330                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
331
332                 bio->bi_bdev = sbi->sb->s_bdev;
333                 ret = submit_bio_wait(WRITE_FLUSH, bio);
334
335                 llist_for_each_entry_safe(cmd, next,
336                                           fcc->dispatch_list, llnode) {
337                         cmd->ret = ret;
338                         complete(&cmd->wait);
339                 }
340                 bio_put(bio);
341                 fcc->dispatch_list = NULL;
342         }
343
344         wait_event_interruptible(*q,
345                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
346         goto repeat;
347 }
348
349 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
350 {
351         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
352         struct flush_cmd cmd;
353
354         trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
355                                         test_opt(sbi, FLUSH_MERGE));
356
357         if (test_opt(sbi, NOBARRIER))
358                 return 0;
359
360         if (!test_opt(sbi, FLUSH_MERGE))
361                 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
362
363         init_completion(&cmd.wait);
364
365         llist_add(&cmd.llnode, &fcc->issue_list);
366
367         if (!fcc->dispatch_list)
368                 wake_up(&fcc->flush_wait_queue);
369
370         wait_for_completion(&cmd.wait);
371
372         return cmd.ret;
373 }
374
375 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
376 {
377         dev_t dev = sbi->sb->s_bdev->bd_dev;
378         struct flush_cmd_control *fcc;
379         int err = 0;
380
381         fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
382         if (!fcc)
383                 return -ENOMEM;
384         init_waitqueue_head(&fcc->flush_wait_queue);
385         init_llist_head(&fcc->issue_list);
386         SM_I(sbi)->cmd_control_info = fcc;
387         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
388                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
389         if (IS_ERR(fcc->f2fs_issue_flush)) {
390                 err = PTR_ERR(fcc->f2fs_issue_flush);
391                 kfree(fcc);
392                 SM_I(sbi)->cmd_control_info = NULL;
393                 return err;
394         }
395
396         return err;
397 }
398
399 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
400 {
401         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
402
403         if (fcc && fcc->f2fs_issue_flush)
404                 kthread_stop(fcc->f2fs_issue_flush);
405         kfree(fcc);
406         SM_I(sbi)->cmd_control_info = NULL;
407 }
408
409 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
410                 enum dirty_type dirty_type)
411 {
412         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
413
414         /* need not be added */
415         if (IS_CURSEG(sbi, segno))
416                 return;
417
418         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
419                 dirty_i->nr_dirty[dirty_type]++;
420
421         if (dirty_type == DIRTY) {
422                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
423                 enum dirty_type t = sentry->type;
424
425                 if (unlikely(t >= DIRTY)) {
426                         f2fs_bug_on(sbi, 1);
427                         return;
428                 }
429                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
430                         dirty_i->nr_dirty[t]++;
431         }
432 }
433
434 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
435                 enum dirty_type dirty_type)
436 {
437         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
438
439         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
440                 dirty_i->nr_dirty[dirty_type]--;
441
442         if (dirty_type == DIRTY) {
443                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
444                 enum dirty_type t = sentry->type;
445
446                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
447                         dirty_i->nr_dirty[t]--;
448
449                 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
450                         clear_bit(GET_SECNO(sbi, segno),
451                                                 dirty_i->victim_secmap);
452         }
453 }
454
455 /*
456  * Should not occur error such as -ENOMEM.
457  * Adding dirty entry into seglist is not critical operation.
458  * If a given segment is one of current working segments, it won't be added.
459  */
460 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
461 {
462         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
463         unsigned short valid_blocks;
464
465         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
466                 return;
467
468         mutex_lock(&dirty_i->seglist_lock);
469
470         valid_blocks = get_valid_blocks(sbi, segno, 0);
471
472         if (valid_blocks == 0) {
473                 __locate_dirty_segment(sbi, segno, PRE);
474                 __remove_dirty_segment(sbi, segno, DIRTY);
475         } else if (valid_blocks < sbi->blocks_per_seg) {
476                 __locate_dirty_segment(sbi, segno, DIRTY);
477         } else {
478                 /* Recovery routine with SSR needs this */
479                 __remove_dirty_segment(sbi, segno, DIRTY);
480         }
481
482         mutex_unlock(&dirty_i->seglist_lock);
483 }
484
485 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
486                                 block_t blkstart, block_t blklen)
487 {
488         sector_t start = SECTOR_FROM_BLOCK(blkstart);
489         sector_t len = SECTOR_FROM_BLOCK(blklen);
490         struct seg_entry *se;
491         unsigned int offset;
492         block_t i;
493
494         for (i = blkstart; i < blkstart + blklen; i++) {
495                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
496                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
497
498                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
499                         sbi->discard_blks--;
500         }
501         trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
502         return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
503 }
504
505 void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
506 {
507         int err = -ENOTSUPP;
508
509         if (test_opt(sbi, DISCARD)) {
510                 struct seg_entry *se = get_seg_entry(sbi,
511                                 GET_SEGNO(sbi, blkaddr));
512                 unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
513
514                 if (f2fs_test_bit(offset, se->discard_map))
515                         return;
516
517                 err = f2fs_issue_discard(sbi, blkaddr, 1);
518         }
519
520         if (err) {
521                 struct page *page = grab_meta_page(sbi, blkaddr);
522                 /* zero-filled page */
523                 set_page_dirty(page);
524                 f2fs_put_page(page, 1);
525         }
526 }
527
528 static void __add_discard_entry(struct f2fs_sb_info *sbi,
529                 struct cp_control *cpc, struct seg_entry *se,
530                 unsigned int start, unsigned int end)
531 {
532         struct list_head *head = &SM_I(sbi)->discard_list;
533         struct discard_entry *new, *last;
534
535         if (!list_empty(head)) {
536                 last = list_last_entry(head, struct discard_entry, list);
537                 if (START_BLOCK(sbi, cpc->trim_start) + start ==
538                                                 last->blkaddr + last->len) {
539                         last->len += end - start;
540                         goto done;
541                 }
542         }
543
544         new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
545         INIT_LIST_HEAD(&new->list);
546         new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
547         new->len = end - start;
548         list_add_tail(&new->list, head);
549 done:
550         SM_I(sbi)->nr_discards += end - start;
551         cpc->trimmed += end - start;
552 }
553
554 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
555 {
556         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
557         int max_blocks = sbi->blocks_per_seg;
558         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
559         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
560         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
561         unsigned long *discard_map = (unsigned long *)se->discard_map;
562         unsigned long *dmap = SIT_I(sbi)->tmp_map;
563         unsigned int start = 0, end = -1;
564         bool force = (cpc->reason == CP_DISCARD);
565         int i;
566
567         if (se->valid_blocks == max_blocks)
568                 return;
569
570         if (!force) {
571                 if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
572                         SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
573                 return;
574         }
575
576         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
577         for (i = 0; i < entries; i++)
578                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
579                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
580
581         while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
582                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
583                 if (start >= max_blocks)
584                         break;
585
586                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
587                 __add_discard_entry(sbi, cpc, se, start, end);
588         }
589 }
590
591 void release_discard_addrs(struct f2fs_sb_info *sbi)
592 {
593         struct list_head *head = &(SM_I(sbi)->discard_list);
594         struct discard_entry *entry, *this;
595
596         /* drop caches */
597         list_for_each_entry_safe(entry, this, head, list) {
598                 list_del(&entry->list);
599                 kmem_cache_free(discard_entry_slab, entry);
600         }
601 }
602
603 /*
604  * Should call clear_prefree_segments after checkpoint is done.
605  */
606 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
607 {
608         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
609         unsigned int segno;
610
611         mutex_lock(&dirty_i->seglist_lock);
612         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
613                 __set_test_and_free(sbi, segno);
614         mutex_unlock(&dirty_i->seglist_lock);
615 }
616
617 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
618 {
619         struct list_head *head = &(SM_I(sbi)->discard_list);
620         struct discard_entry *entry, *this;
621         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
622         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
623         unsigned int start = 0, end = -1;
624
625         mutex_lock(&dirty_i->seglist_lock);
626
627         while (1) {
628                 int i;
629                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
630                 if (start >= MAIN_SEGS(sbi))
631                         break;
632                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
633                                                                 start + 1);
634
635                 for (i = start; i < end; i++)
636                         clear_bit(i, prefree_map);
637
638                 dirty_i->nr_dirty[PRE] -= end - start;
639
640                 if (!test_opt(sbi, DISCARD))
641                         continue;
642
643                 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
644                                 (end - start) << sbi->log_blocks_per_seg);
645         }
646         mutex_unlock(&dirty_i->seglist_lock);
647
648         /* send small discards */
649         list_for_each_entry_safe(entry, this, head, list) {
650                 if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen)
651                         goto skip;
652                 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
653 skip:
654                 list_del(&entry->list);
655                 SM_I(sbi)->nr_discards -= entry->len;
656                 kmem_cache_free(discard_entry_slab, entry);
657         }
658 }
659
660 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
661 {
662         struct sit_info *sit_i = SIT_I(sbi);
663
664         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
665                 sit_i->dirty_sentries++;
666                 return false;
667         }
668
669         return true;
670 }
671
672 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
673                                         unsigned int segno, int modified)
674 {
675         struct seg_entry *se = get_seg_entry(sbi, segno);
676         se->type = type;
677         if (modified)
678                 __mark_sit_entry_dirty(sbi, segno);
679 }
680
681 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
682 {
683         struct seg_entry *se;
684         unsigned int segno, offset;
685         long int new_vblocks;
686
687         segno = GET_SEGNO(sbi, blkaddr);
688
689         se = get_seg_entry(sbi, segno);
690         new_vblocks = se->valid_blocks + del;
691         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
692
693         f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
694                                 (new_vblocks > sbi->blocks_per_seg)));
695
696         se->valid_blocks = new_vblocks;
697         se->mtime = get_mtime(sbi);
698         SIT_I(sbi)->max_mtime = se->mtime;
699
700         /* Update valid block bitmap */
701         if (del > 0) {
702                 if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
703                         f2fs_bug_on(sbi, 1);
704                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
705                         sbi->discard_blks--;
706         } else {
707                 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
708                         f2fs_bug_on(sbi, 1);
709                 if (f2fs_test_and_clear_bit(offset, se->discard_map))
710                         sbi->discard_blks++;
711         }
712         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
713                 se->ckpt_valid_blocks += del;
714
715         __mark_sit_entry_dirty(sbi, segno);
716
717         /* update total number of valid blocks to be written in ckpt area */
718         SIT_I(sbi)->written_valid_blocks += del;
719
720         if (sbi->segs_per_sec > 1)
721                 get_sec_entry(sbi, segno)->valid_blocks += del;
722 }
723
724 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
725 {
726         update_sit_entry(sbi, new, 1);
727         if (GET_SEGNO(sbi, old) != NULL_SEGNO)
728                 update_sit_entry(sbi, old, -1);
729
730         locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
731         locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
732 }
733
734 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
735 {
736         unsigned int segno = GET_SEGNO(sbi, addr);
737         struct sit_info *sit_i = SIT_I(sbi);
738
739         f2fs_bug_on(sbi, addr == NULL_ADDR);
740         if (addr == NEW_ADDR)
741                 return;
742
743         /* add it into sit main buffer */
744         mutex_lock(&sit_i->sentry_lock);
745
746         update_sit_entry(sbi, addr, -1);
747
748         /* add it into dirty seglist */
749         locate_dirty_segment(sbi, segno);
750
751         mutex_unlock(&sit_i->sentry_lock);
752 }
753
754 /*
755  * This function should be resided under the curseg_mutex lock
756  */
757 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
758                                         struct f2fs_summary *sum)
759 {
760         struct curseg_info *curseg = CURSEG_I(sbi, type);
761         void *addr = curseg->sum_blk;
762         addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
763         memcpy(addr, sum, sizeof(struct f2fs_summary));
764 }
765
766 /*
767  * Calculate the number of current summary pages for writing
768  */
769 int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
770 {
771         int valid_sum_count = 0;
772         int i, sum_in_page;
773
774         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
775                 if (sbi->ckpt->alloc_type[i] == SSR)
776                         valid_sum_count += sbi->blocks_per_seg;
777                 else {
778                         if (for_ra)
779                                 valid_sum_count += le16_to_cpu(
780                                         F2FS_CKPT(sbi)->cur_data_blkoff[i]);
781                         else
782                                 valid_sum_count += curseg_blkoff(sbi, i);
783                 }
784         }
785
786         sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
787                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
788         if (valid_sum_count <= sum_in_page)
789                 return 1;
790         else if ((valid_sum_count - sum_in_page) <=
791                 (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
792                 return 2;
793         return 3;
794 }
795
796 /*
797  * Caller should put this summary page
798  */
799 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
800 {
801         return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
802 }
803
804 static void write_sum_page(struct f2fs_sb_info *sbi,
805                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
806 {
807         struct page *page = grab_meta_page(sbi, blk_addr);
808         void *kaddr = page_address(page);
809         memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
810         set_page_dirty(page);
811         f2fs_put_page(page, 1);
812 }
813
814 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
815 {
816         struct curseg_info *curseg = CURSEG_I(sbi, type);
817         unsigned int segno = curseg->segno + 1;
818         struct free_segmap_info *free_i = FREE_I(sbi);
819
820         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
821                 return !test_bit(segno, free_i->free_segmap);
822         return 0;
823 }
824
825 /*
826  * Find a new segment from the free segments bitmap to right order
827  * This function should be returned with success, otherwise BUG
828  */
829 static void get_new_segment(struct f2fs_sb_info *sbi,
830                         unsigned int *newseg, bool new_sec, int dir)
831 {
832         struct free_segmap_info *free_i = FREE_I(sbi);
833         unsigned int segno, secno, zoneno;
834         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
835         unsigned int hint = *newseg / sbi->segs_per_sec;
836         unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
837         unsigned int left_start = hint;
838         bool init = true;
839         int go_left = 0;
840         int i;
841
842         spin_lock(&free_i->segmap_lock);
843
844         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
845                 segno = find_next_zero_bit(free_i->free_segmap,
846                                         MAIN_SEGS(sbi), *newseg + 1);
847                 if (segno - *newseg < sbi->segs_per_sec -
848                                         (*newseg % sbi->segs_per_sec))
849                         goto got_it;
850         }
851 find_other_zone:
852         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
853         if (secno >= MAIN_SECS(sbi)) {
854                 if (dir == ALLOC_RIGHT) {
855                         secno = find_next_zero_bit(free_i->free_secmap,
856                                                         MAIN_SECS(sbi), 0);
857                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
858                 } else {
859                         go_left = 1;
860                         left_start = hint - 1;
861                 }
862         }
863         if (go_left == 0)
864                 goto skip_left;
865
866         while (test_bit(left_start, free_i->free_secmap)) {
867                 if (left_start > 0) {
868                         left_start--;
869                         continue;
870                 }
871                 left_start = find_next_zero_bit(free_i->free_secmap,
872                                                         MAIN_SECS(sbi), 0);
873                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
874                 break;
875         }
876         secno = left_start;
877 skip_left:
878         hint = secno;
879         segno = secno * sbi->segs_per_sec;
880         zoneno = secno / sbi->secs_per_zone;
881
882         /* give up on finding another zone */
883         if (!init)
884                 goto got_it;
885         if (sbi->secs_per_zone == 1)
886                 goto got_it;
887         if (zoneno == old_zoneno)
888                 goto got_it;
889         if (dir == ALLOC_LEFT) {
890                 if (!go_left && zoneno + 1 >= total_zones)
891                         goto got_it;
892                 if (go_left && zoneno == 0)
893                         goto got_it;
894         }
895         for (i = 0; i < NR_CURSEG_TYPE; i++)
896                 if (CURSEG_I(sbi, i)->zone == zoneno)
897                         break;
898
899         if (i < NR_CURSEG_TYPE) {
900                 /* zone is in user, try another */
901                 if (go_left)
902                         hint = zoneno * sbi->secs_per_zone - 1;
903                 else if (zoneno + 1 >= total_zones)
904                         hint = 0;
905                 else
906                         hint = (zoneno + 1) * sbi->secs_per_zone;
907                 init = false;
908                 goto find_other_zone;
909         }
910 got_it:
911         /* set it as dirty segment in free segmap */
912         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
913         __set_inuse(sbi, segno);
914         *newseg = segno;
915         spin_unlock(&free_i->segmap_lock);
916 }
917
918 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
919 {
920         struct curseg_info *curseg = CURSEG_I(sbi, type);
921         struct summary_footer *sum_footer;
922
923         curseg->segno = curseg->next_segno;
924         curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
925         curseg->next_blkoff = 0;
926         curseg->next_segno = NULL_SEGNO;
927
928         sum_footer = &(curseg->sum_blk->footer);
929         memset(sum_footer, 0, sizeof(struct summary_footer));
930         if (IS_DATASEG(type))
931                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
932         if (IS_NODESEG(type))
933                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
934         __set_sit_entry_type(sbi, type, curseg->segno, modified);
935 }
936
937 /*
938  * Allocate a current working segment.
939  * This function always allocates a free segment in LFS manner.
940  */
941 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
942 {
943         struct curseg_info *curseg = CURSEG_I(sbi, type);
944         unsigned int segno = curseg->segno;
945         int dir = ALLOC_LEFT;
946
947         write_sum_page(sbi, curseg->sum_blk,
948                                 GET_SUM_BLOCK(sbi, segno));
949         if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
950                 dir = ALLOC_RIGHT;
951
952         if (test_opt(sbi, NOHEAP))
953                 dir = ALLOC_RIGHT;
954
955         get_new_segment(sbi, &segno, new_sec, dir);
956         curseg->next_segno = segno;
957         reset_curseg(sbi, type, 1);
958         curseg->alloc_type = LFS;
959 }
960
961 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
962                         struct curseg_info *seg, block_t start)
963 {
964         struct seg_entry *se = get_seg_entry(sbi, seg->segno);
965         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
966         unsigned long *target_map = SIT_I(sbi)->tmp_map;
967         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
968         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
969         int i, pos;
970
971         for (i = 0; i < entries; i++)
972                 target_map[i] = ckpt_map[i] | cur_map[i];
973
974         pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
975
976         seg->next_blkoff = pos;
977 }
978
979 /*
980  * If a segment is written by LFS manner, next block offset is just obtained
981  * by increasing the current block offset. However, if a segment is written by
982  * SSR manner, next block offset obtained by calling __next_free_blkoff
983  */
984 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
985                                 struct curseg_info *seg)
986 {
987         if (seg->alloc_type == SSR)
988                 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
989         else
990                 seg->next_blkoff++;
991 }
992
993 /*
994  * This function always allocates a used segment(from dirty seglist) by SSR
995  * manner, so it should recover the existing segment information of valid blocks
996  */
997 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
998 {
999         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1000         struct curseg_info *curseg = CURSEG_I(sbi, type);
1001         unsigned int new_segno = curseg->next_segno;
1002         struct f2fs_summary_block *sum_node;
1003         struct page *sum_page;
1004
1005         write_sum_page(sbi, curseg->sum_blk,
1006                                 GET_SUM_BLOCK(sbi, curseg->segno));
1007         __set_test_and_inuse(sbi, new_segno);
1008
1009         mutex_lock(&dirty_i->seglist_lock);
1010         __remove_dirty_segment(sbi, new_segno, PRE);
1011         __remove_dirty_segment(sbi, new_segno, DIRTY);
1012         mutex_unlock(&dirty_i->seglist_lock);
1013
1014         reset_curseg(sbi, type, 1);
1015         curseg->alloc_type = SSR;
1016         __next_free_blkoff(sbi, curseg, 0);
1017
1018         if (reuse) {
1019                 sum_page = get_sum_page(sbi, new_segno);
1020                 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
1021                 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
1022                 f2fs_put_page(sum_page, 1);
1023         }
1024 }
1025
1026 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1027 {
1028         struct curseg_info *curseg = CURSEG_I(sbi, type);
1029         const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
1030
1031         if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
1032                 return v_ops->get_victim(sbi,
1033                                 &(curseg)->next_segno, BG_GC, type, SSR);
1034
1035         /* For data segments, let's do SSR more intensively */
1036         for (; type >= CURSEG_HOT_DATA; type--)
1037                 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1038                                                 BG_GC, type, SSR))
1039                         return 1;
1040         return 0;
1041 }
1042
1043 /*
1044  * flush out current segment and replace it with new segment
1045  * This function should be returned with success, otherwise BUG
1046  */
1047 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1048                                                 int type, bool force)
1049 {
1050         struct curseg_info *curseg = CURSEG_I(sbi, type);
1051
1052         if (force)
1053                 new_curseg(sbi, type, true);
1054         else if (type == CURSEG_WARM_NODE)
1055                 new_curseg(sbi, type, false);
1056         else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1057                 new_curseg(sbi, type, false);
1058         else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1059                 change_curseg(sbi, type, true);
1060         else
1061                 new_curseg(sbi, type, false);
1062
1063         stat_inc_seg_type(sbi, curseg);
1064 }
1065
1066 static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
1067 {
1068         struct curseg_info *curseg = CURSEG_I(sbi, type);
1069         unsigned int old_segno;
1070
1071         old_segno = curseg->segno;
1072         SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
1073         locate_dirty_segment(sbi, old_segno);
1074 }
1075
1076 void allocate_new_segments(struct f2fs_sb_info *sbi)
1077 {
1078         int i;
1079
1080         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
1081                 __allocate_new_segments(sbi, i);
1082 }
1083
1084 static const struct segment_allocation default_salloc_ops = {
1085         .allocate_segment = allocate_segment_by_default,
1086 };
1087
1088 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1089 {
1090         __u64 start = F2FS_BYTES_TO_BLK(range->start);
1091         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
1092         unsigned int start_segno, end_segno;
1093         struct cp_control cpc;
1094
1095         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
1096                 return -EINVAL;
1097
1098         cpc.trimmed = 0;
1099         if (end <= MAIN_BLKADDR(sbi))
1100                 goto out;
1101
1102         /* start/end segment number in main_area */
1103         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1104         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1105                                                 GET_SEGNO(sbi, end);
1106         cpc.reason = CP_DISCARD;
1107         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
1108
1109         /* do checkpoint to issue discard commands safely */
1110         for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
1111                 cpc.trim_start = start_segno;
1112
1113                 if (sbi->discard_blks == 0)
1114                         break;
1115                 else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
1116                         cpc.trim_end = end_segno;
1117                 else
1118                         cpc.trim_end = min_t(unsigned int,
1119                                 rounddown(start_segno +
1120                                 BATCHED_TRIM_SEGMENTS(sbi),
1121                                 sbi->segs_per_sec) - 1, end_segno);
1122
1123                 mutex_lock(&sbi->gc_mutex);
1124                 write_checkpoint(sbi, &cpc);
1125                 mutex_unlock(&sbi->gc_mutex);
1126         }
1127 out:
1128         range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
1129         return 0;
1130 }
1131
1132 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1133 {
1134         struct curseg_info *curseg = CURSEG_I(sbi, type);
1135         if (curseg->next_blkoff < sbi->blocks_per_seg)
1136                 return true;
1137         return false;
1138 }
1139
1140 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1141 {
1142         if (p_type == DATA)
1143                 return CURSEG_HOT_DATA;
1144         else
1145                 return CURSEG_HOT_NODE;
1146 }
1147
1148 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1149 {
1150         if (p_type == DATA) {
1151                 struct inode *inode = page->mapping->host;
1152
1153                 if (S_ISDIR(inode->i_mode))
1154                         return CURSEG_HOT_DATA;
1155                 else
1156                         return CURSEG_COLD_DATA;
1157         } else {
1158                 if (IS_DNODE(page) && is_cold_node(page))
1159                         return CURSEG_WARM_NODE;
1160                 else
1161                         return CURSEG_COLD_NODE;
1162         }
1163 }
1164
1165 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1166 {
1167         if (p_type == DATA) {
1168                 struct inode *inode = page->mapping->host;
1169
1170                 if (S_ISDIR(inode->i_mode))
1171                         return CURSEG_HOT_DATA;
1172                 else if (is_cold_data(page) || file_is_cold(inode))
1173                         return CURSEG_COLD_DATA;
1174                 else
1175                         return CURSEG_WARM_DATA;
1176         } else {
1177                 if (IS_DNODE(page))
1178                         return is_cold_node(page) ? CURSEG_WARM_NODE :
1179                                                 CURSEG_HOT_NODE;
1180                 else
1181                         return CURSEG_COLD_NODE;
1182         }
1183 }
1184
1185 static int __get_segment_type(struct page *page, enum page_type p_type)
1186 {
1187         switch (F2FS_P_SB(page)->active_logs) {
1188         case 2:
1189                 return __get_segment_type_2(page, p_type);
1190         case 4:
1191                 return __get_segment_type_4(page, p_type);
1192         }
1193         /* NR_CURSEG_TYPE(6) logs by default */
1194         f2fs_bug_on(F2FS_P_SB(page),
1195                 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1196         return __get_segment_type_6(page, p_type);
1197 }
1198
1199 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1200                 block_t old_blkaddr, block_t *new_blkaddr,
1201                 struct f2fs_summary *sum, int type)
1202 {
1203         struct sit_info *sit_i = SIT_I(sbi);
1204         struct curseg_info *curseg;
1205         bool direct_io = (type == CURSEG_DIRECT_IO);
1206
1207         type = direct_io ? CURSEG_WARM_DATA : type;
1208
1209         curseg = CURSEG_I(sbi, type);
1210
1211         mutex_lock(&curseg->curseg_mutex);
1212         mutex_lock(&sit_i->sentry_lock);
1213
1214         /* direct_io'ed data is aligned to the segment for better performance */
1215         if (direct_io && curseg->next_blkoff)
1216                 __allocate_new_segments(sbi, type);
1217
1218         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1219
1220         /*
1221          * __add_sum_entry should be resided under the curseg_mutex
1222          * because, this function updates a summary entry in the
1223          * current summary block.
1224          */
1225         __add_sum_entry(sbi, type, sum);
1226
1227         __refresh_next_blkoff(sbi, curseg);
1228
1229         stat_inc_block_count(sbi, curseg);
1230
1231         if (!__has_curseg_space(sbi, type))
1232                 sit_i->s_ops->allocate_segment(sbi, type, false);
1233         /*
1234          * SIT information should be updated before segment allocation,
1235          * since SSR needs latest valid block information.
1236          */
1237         refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1238
1239         mutex_unlock(&sit_i->sentry_lock);
1240
1241         if (page && IS_NODESEG(type))
1242                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1243
1244         mutex_unlock(&curseg->curseg_mutex);
1245 }
1246
1247 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
1248 {
1249         int type = __get_segment_type(fio->page, fio->type);
1250
1251         allocate_data_block(fio->sbi, fio->page, fio->blk_addr,
1252                                         &fio->blk_addr, sum, type);
1253
1254         /* writeout dirty page into bdev */
1255         f2fs_submit_page_mbio(fio);
1256 }
1257
1258 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1259 {
1260         struct f2fs_io_info fio = {
1261                 .sbi = sbi,
1262                 .type = META,
1263                 .rw = WRITE_SYNC | REQ_META | REQ_PRIO,
1264                 .blk_addr = page->index,
1265                 .page = page,
1266                 .encrypted_page = NULL,
1267         };
1268
1269         set_page_writeback(page);
1270         f2fs_submit_page_mbio(&fio);
1271 }
1272
1273 void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
1274 {
1275         struct f2fs_summary sum;
1276
1277         set_summary(&sum, nid, 0, 0);
1278         do_write_page(&sum, fio);
1279 }
1280
1281 void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
1282 {
1283         struct f2fs_sb_info *sbi = fio->sbi;
1284         struct f2fs_summary sum;
1285         struct node_info ni;
1286
1287         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1288         get_node_info(sbi, dn->nid, &ni);
1289         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1290         do_write_page(&sum, fio);
1291         dn->data_blkaddr = fio->blk_addr;
1292 }
1293
1294 void rewrite_data_page(struct f2fs_io_info *fio)
1295 {
1296         stat_inc_inplace_blocks(fio->sbi);
1297         f2fs_submit_page_mbio(fio);
1298 }
1299
1300 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
1301                                 block_t old_blkaddr, block_t new_blkaddr,
1302                                 bool recover_curseg)
1303 {
1304         struct sit_info *sit_i = SIT_I(sbi);
1305         struct curseg_info *curseg;
1306         unsigned int segno, old_cursegno;
1307         struct seg_entry *se;
1308         int type;
1309         unsigned short old_blkoff;
1310
1311         segno = GET_SEGNO(sbi, new_blkaddr);
1312         se = get_seg_entry(sbi, segno);
1313         type = se->type;
1314
1315         if (!recover_curseg) {
1316                 /* for recovery flow */
1317                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1318                         if (old_blkaddr == NULL_ADDR)
1319                                 type = CURSEG_COLD_DATA;
1320                         else
1321                                 type = CURSEG_WARM_DATA;
1322                 }
1323         } else {
1324                 if (!IS_CURSEG(sbi, segno))
1325                         type = CURSEG_WARM_DATA;
1326         }
1327
1328         curseg = CURSEG_I(sbi, type);
1329
1330         mutex_lock(&curseg->curseg_mutex);
1331         mutex_lock(&sit_i->sentry_lock);
1332
1333         old_cursegno = curseg->segno;
1334         old_blkoff = curseg->next_blkoff;
1335
1336         /* change the current segment */
1337         if (segno != curseg->segno) {
1338                 curseg->next_segno = segno;
1339                 change_curseg(sbi, type, true);
1340         }
1341
1342         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1343         __add_sum_entry(sbi, type, sum);
1344
1345         refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1346         locate_dirty_segment(sbi, old_cursegno);
1347
1348         if (recover_curseg) {
1349                 if (old_cursegno != curseg->segno) {
1350                         curseg->next_segno = old_cursegno;
1351                         change_curseg(sbi, type, true);
1352                 }
1353                 curseg->next_blkoff = old_blkoff;
1354         }
1355
1356         mutex_unlock(&sit_i->sentry_lock);
1357         mutex_unlock(&curseg->curseg_mutex);
1358 }
1359
1360 static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1361                                         struct page *page, enum page_type type)
1362 {
1363         enum page_type btype = PAGE_TYPE_OF_BIO(type);
1364         struct f2fs_bio_info *io = &sbi->write_io[btype];
1365         struct bio_vec *bvec;
1366         struct page *target;
1367         int i;
1368
1369         down_read(&io->io_rwsem);
1370         if (!io->bio) {
1371                 up_read(&io->io_rwsem);
1372                 return false;
1373         }
1374
1375         bio_for_each_segment_all(bvec, io->bio, i) {
1376
1377                 if (bvec->bv_page->mapping) {
1378                         target = bvec->bv_page;
1379                 } else {
1380                         struct f2fs_crypto_ctx *ctx;
1381
1382                         /* encrypted page */
1383                         ctx = (struct f2fs_crypto_ctx *)page_private(
1384                                                                 bvec->bv_page);
1385                         target = ctx->control_page;
1386                 }
1387
1388                 if (page == target) {
1389                         up_read(&io->io_rwsem);
1390                         return true;
1391                 }
1392         }
1393
1394         up_read(&io->io_rwsem);
1395         return false;
1396 }
1397
1398 void f2fs_wait_on_page_writeback(struct page *page,
1399                                 enum page_type type)
1400 {
1401         if (PageWriteback(page)) {
1402                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1403
1404                 if (is_merged_page(sbi, page, type))
1405                         f2fs_submit_merged_bio(sbi, type, WRITE);
1406                 wait_on_page_writeback(page);
1407         }
1408 }
1409
1410 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1411 {
1412         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1413         struct curseg_info *seg_i;
1414         unsigned char *kaddr;
1415         struct page *page;
1416         block_t start;
1417         int i, j, offset;
1418
1419         start = start_sum_block(sbi);
1420
1421         page = get_meta_page(sbi, start++);
1422         kaddr = (unsigned char *)page_address(page);
1423
1424         /* Step 1: restore nat cache */
1425         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1426         memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1427
1428         /* Step 2: restore sit cache */
1429         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1430         memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1431                                                 SUM_JOURNAL_SIZE);
1432         offset = 2 * SUM_JOURNAL_SIZE;
1433
1434         /* Step 3: restore summary entries */
1435         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1436                 unsigned short blk_off;
1437                 unsigned int segno;
1438
1439                 seg_i = CURSEG_I(sbi, i);
1440                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1441                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1442                 seg_i->next_segno = segno;
1443                 reset_curseg(sbi, i, 0);
1444                 seg_i->alloc_type = ckpt->alloc_type[i];
1445                 seg_i->next_blkoff = blk_off;
1446
1447                 if (seg_i->alloc_type == SSR)
1448                         blk_off = sbi->blocks_per_seg;
1449
1450                 for (j = 0; j < blk_off; j++) {
1451                         struct f2fs_summary *s;
1452                         s = (struct f2fs_summary *)(kaddr + offset);
1453                         seg_i->sum_blk->entries[j] = *s;
1454                         offset += SUMMARY_SIZE;
1455                         if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1456                                                 SUM_FOOTER_SIZE)
1457                                 continue;
1458
1459                         f2fs_put_page(page, 1);
1460                         page = NULL;
1461
1462                         page = get_meta_page(sbi, start++);
1463                         kaddr = (unsigned char *)page_address(page);
1464                         offset = 0;
1465                 }
1466         }
1467         f2fs_put_page(page, 1);
1468         return 0;
1469 }
1470
1471 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1472 {
1473         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1474         struct f2fs_summary_block *sum;
1475         struct curseg_info *curseg;
1476         struct page *new;
1477         unsigned short blk_off;
1478         unsigned int segno = 0;
1479         block_t blk_addr = 0;
1480
1481         /* get segment number and block addr */
1482         if (IS_DATASEG(type)) {
1483                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1484                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1485                                                         CURSEG_HOT_DATA]);
1486                 if (__exist_node_summaries(sbi))
1487                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1488                 else
1489                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1490         } else {
1491                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1492                                                         CURSEG_HOT_NODE]);
1493                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1494                                                         CURSEG_HOT_NODE]);
1495                 if (__exist_node_summaries(sbi))
1496                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1497                                                         type - CURSEG_HOT_NODE);
1498                 else
1499                         blk_addr = GET_SUM_BLOCK(sbi, segno);
1500         }
1501
1502         new = get_meta_page(sbi, blk_addr);
1503         sum = (struct f2fs_summary_block *)page_address(new);
1504
1505         if (IS_NODESEG(type)) {
1506                 if (__exist_node_summaries(sbi)) {
1507                         struct f2fs_summary *ns = &sum->entries[0];
1508                         int i;
1509                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1510                                 ns->version = 0;
1511                                 ns->ofs_in_node = 0;
1512                         }
1513                 } else {
1514                         int err;
1515
1516                         err = restore_node_summary(sbi, segno, sum);
1517                         if (err) {
1518                                 f2fs_put_page(new, 1);
1519                                 return err;
1520                         }
1521                 }
1522         }
1523
1524         /* set uncompleted segment to curseg */
1525         curseg = CURSEG_I(sbi, type);
1526         mutex_lock(&curseg->curseg_mutex);
1527         memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1528         curseg->next_segno = segno;
1529         reset_curseg(sbi, type, 0);
1530         curseg->alloc_type = ckpt->alloc_type[type];
1531         curseg->next_blkoff = blk_off;
1532         mutex_unlock(&curseg->curseg_mutex);
1533         f2fs_put_page(new, 1);
1534         return 0;
1535 }
1536
1537 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1538 {
1539         int type = CURSEG_HOT_DATA;
1540         int err;
1541
1542         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1543                 int npages = npages_for_summary_flush(sbi, true);
1544
1545                 if (npages >= 2)
1546                         ra_meta_pages(sbi, start_sum_block(sbi), npages,
1547                                                                 META_CP);
1548
1549                 /* restore for compacted data summary */
1550                 if (read_compacted_summaries(sbi))
1551                         return -EINVAL;
1552                 type = CURSEG_HOT_NODE;
1553         }
1554
1555         if (__exist_node_summaries(sbi))
1556                 ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
1557                                         NR_CURSEG_TYPE - type, META_CP);
1558
1559         for (; type <= CURSEG_COLD_NODE; type++) {
1560                 err = read_normal_summaries(sbi, type);
1561                 if (err)
1562                         return err;
1563         }
1564
1565         return 0;
1566 }
1567
1568 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1569 {
1570         struct page *page;
1571         unsigned char *kaddr;
1572         struct f2fs_summary *summary;
1573         struct curseg_info *seg_i;
1574         int written_size = 0;
1575         int i, j;
1576
1577         page = grab_meta_page(sbi, blkaddr++);
1578         kaddr = (unsigned char *)page_address(page);
1579
1580         /* Step 1: write nat cache */
1581         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1582         memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1583         written_size += SUM_JOURNAL_SIZE;
1584
1585         /* Step 2: write sit cache */
1586         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1587         memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1588                                                 SUM_JOURNAL_SIZE);
1589         written_size += SUM_JOURNAL_SIZE;
1590
1591         /* Step 3: write summary entries */
1592         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1593                 unsigned short blkoff;
1594                 seg_i = CURSEG_I(sbi, i);
1595                 if (sbi->ckpt->alloc_type[i] == SSR)
1596                         blkoff = sbi->blocks_per_seg;
1597                 else
1598                         blkoff = curseg_blkoff(sbi, i);
1599
1600                 for (j = 0; j < blkoff; j++) {
1601                         if (!page) {
1602                                 page = grab_meta_page(sbi, blkaddr++);
1603                                 kaddr = (unsigned char *)page_address(page);
1604                                 written_size = 0;
1605                         }
1606                         summary = (struct f2fs_summary *)(kaddr + written_size);
1607                         *summary = seg_i->sum_blk->entries[j];
1608                         written_size += SUMMARY_SIZE;
1609
1610                         if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1611                                                         SUM_FOOTER_SIZE)
1612                                 continue;
1613
1614                         set_page_dirty(page);
1615                         f2fs_put_page(page, 1);
1616                         page = NULL;
1617                 }
1618         }
1619         if (page) {
1620                 set_page_dirty(page);
1621                 f2fs_put_page(page, 1);
1622         }
1623 }
1624
1625 static void write_normal_summaries(struct f2fs_sb_info *sbi,
1626                                         block_t blkaddr, int type)
1627 {
1628         int i, end;
1629         if (IS_DATASEG(type))
1630                 end = type + NR_CURSEG_DATA_TYPE;
1631         else
1632                 end = type + NR_CURSEG_NODE_TYPE;
1633
1634         for (i = type; i < end; i++) {
1635                 struct curseg_info *sum = CURSEG_I(sbi, i);
1636                 mutex_lock(&sum->curseg_mutex);
1637                 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1638                 mutex_unlock(&sum->curseg_mutex);
1639         }
1640 }
1641
1642 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1643 {
1644         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1645                 write_compacted_summaries(sbi, start_blk);
1646         else
1647                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1648 }
1649
1650 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1651 {
1652         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1653 }
1654
1655 int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1656                                         unsigned int val, int alloc)
1657 {
1658         int i;
1659
1660         if (type == NAT_JOURNAL) {
1661                 for (i = 0; i < nats_in_cursum(sum); i++) {
1662                         if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1663                                 return i;
1664                 }
1665                 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1666                         return update_nats_in_cursum(sum, 1);
1667         } else if (type == SIT_JOURNAL) {
1668                 for (i = 0; i < sits_in_cursum(sum); i++)
1669                         if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1670                                 return i;
1671                 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1672                         return update_sits_in_cursum(sum, 1);
1673         }
1674         return -1;
1675 }
1676
1677 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1678                                         unsigned int segno)
1679 {
1680         return get_meta_page(sbi, current_sit_addr(sbi, segno));
1681 }
1682
1683 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1684                                         unsigned int start)
1685 {
1686         struct sit_info *sit_i = SIT_I(sbi);
1687         struct page *src_page, *dst_page;
1688         pgoff_t src_off, dst_off;
1689         void *src_addr, *dst_addr;
1690
1691         src_off = current_sit_addr(sbi, start);
1692         dst_off = next_sit_addr(sbi, src_off);
1693
1694         /* get current sit block page without lock */
1695         src_page = get_meta_page(sbi, src_off);
1696         dst_page = grab_meta_page(sbi, dst_off);
1697         f2fs_bug_on(sbi, PageDirty(src_page));
1698
1699         src_addr = page_address(src_page);
1700         dst_addr = page_address(dst_page);
1701         memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1702
1703         set_page_dirty(dst_page);
1704         f2fs_put_page(src_page, 1);
1705
1706         set_to_next_sit(sit_i, start);
1707
1708         return dst_page;
1709 }
1710
1711 static struct sit_entry_set *grab_sit_entry_set(void)
1712 {
1713         struct sit_entry_set *ses =
1714                         f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1715
1716         ses->entry_cnt = 0;
1717         INIT_LIST_HEAD(&ses->set_list);
1718         return ses;
1719 }
1720
1721 static void release_sit_entry_set(struct sit_entry_set *ses)
1722 {
1723         list_del(&ses->set_list);
1724         kmem_cache_free(sit_entry_set_slab, ses);
1725 }
1726
1727 static void adjust_sit_entry_set(struct sit_entry_set *ses,
1728                                                 struct list_head *head)
1729 {
1730         struct sit_entry_set *next = ses;
1731
1732         if (list_is_last(&ses->set_list, head))
1733                 return;
1734
1735         list_for_each_entry_continue(next, head, set_list)
1736                 if (ses->entry_cnt <= next->entry_cnt)
1737                         break;
1738
1739         list_move_tail(&ses->set_list, &next->set_list);
1740 }
1741
1742 static void add_sit_entry(unsigned int segno, struct list_head *head)
1743 {
1744         struct sit_entry_set *ses;
1745         unsigned int start_segno = START_SEGNO(segno);
1746
1747         list_for_each_entry(ses, head, set_list) {
1748                 if (ses->start_segno == start_segno) {
1749                         ses->entry_cnt++;
1750                         adjust_sit_entry_set(ses, head);
1751                         return;
1752                 }
1753         }
1754
1755         ses = grab_sit_entry_set();
1756
1757         ses->start_segno = start_segno;
1758         ses->entry_cnt++;
1759         list_add(&ses->set_list, head);
1760 }
1761
1762 static void add_sits_in_set(struct f2fs_sb_info *sbi)
1763 {
1764         struct f2fs_sm_info *sm_info = SM_I(sbi);
1765         struct list_head *set_list = &sm_info->sit_entry_set;
1766         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1767         unsigned int segno;
1768
1769         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1770                 add_sit_entry(segno, set_list);
1771 }
1772
1773 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1774 {
1775         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1776         struct f2fs_summary_block *sum = curseg->sum_blk;
1777         int i;
1778
1779         for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1780                 unsigned int segno;
1781                 bool dirtied;
1782
1783                 segno = le32_to_cpu(segno_in_journal(sum, i));
1784                 dirtied = __mark_sit_entry_dirty(sbi, segno);
1785
1786                 if (!dirtied)
1787                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1788         }
1789         update_sits_in_cursum(sum, -sits_in_cursum(sum));
1790 }
1791
1792 /*
1793  * CP calls this function, which flushes SIT entries including sit_journal,
1794  * and moves prefree segs to free segs.
1795  */
1796 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1797 {
1798         struct sit_info *sit_i = SIT_I(sbi);
1799         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1800         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1801         struct f2fs_summary_block *sum = curseg->sum_blk;
1802         struct sit_entry_set *ses, *tmp;
1803         struct list_head *head = &SM_I(sbi)->sit_entry_set;
1804         bool to_journal = true;
1805         struct seg_entry *se;
1806
1807         mutex_lock(&curseg->curseg_mutex);
1808         mutex_lock(&sit_i->sentry_lock);
1809
1810         if (!sit_i->dirty_sentries)
1811                 goto out;
1812
1813         /*
1814          * add and account sit entries of dirty bitmap in sit entry
1815          * set temporarily
1816          */
1817         add_sits_in_set(sbi);
1818
1819         /*
1820          * if there are no enough space in journal to store dirty sit
1821          * entries, remove all entries from journal and add and account
1822          * them in sit entry set.
1823          */
1824         if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1825                 remove_sits_in_journal(sbi);
1826
1827         /*
1828          * there are two steps to flush sit entries:
1829          * #1, flush sit entries to journal in current cold data summary block.
1830          * #2, flush sit entries to sit page.
1831          */
1832         list_for_each_entry_safe(ses, tmp, head, set_list) {
1833                 struct page *page = NULL;
1834                 struct f2fs_sit_block *raw_sit = NULL;
1835                 unsigned int start_segno = ses->start_segno;
1836                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1837                                                 (unsigned long)MAIN_SEGS(sbi));
1838                 unsigned int segno = start_segno;
1839
1840                 if (to_journal &&
1841                         !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1842                         to_journal = false;
1843
1844                 if (!to_journal) {
1845                         page = get_next_sit_page(sbi, start_segno);
1846                         raw_sit = page_address(page);
1847                 }
1848
1849                 /* flush dirty sit entries in region of current sit set */
1850                 for_each_set_bit_from(segno, bitmap, end) {
1851                         int offset, sit_offset;
1852
1853                         se = get_seg_entry(sbi, segno);
1854
1855                         /* add discard candidates */
1856                         if (cpc->reason != CP_DISCARD) {
1857                                 cpc->trim_start = segno;
1858                                 add_discard_addrs(sbi, cpc);
1859                         }
1860
1861                         if (to_journal) {
1862                                 offset = lookup_journal_in_cursum(sum,
1863                                                         SIT_JOURNAL, segno, 1);
1864                                 f2fs_bug_on(sbi, offset < 0);
1865                                 segno_in_journal(sum, offset) =
1866                                                         cpu_to_le32(segno);
1867                                 seg_info_to_raw_sit(se,
1868                                                 &sit_in_journal(sum, offset));
1869                         } else {
1870                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1871                                 seg_info_to_raw_sit(se,
1872                                                 &raw_sit->entries[sit_offset]);
1873                         }
1874
1875                         __clear_bit(segno, bitmap);
1876                         sit_i->dirty_sentries--;
1877                         ses->entry_cnt--;
1878                 }
1879
1880                 if (!to_journal)
1881                         f2fs_put_page(page, 1);
1882
1883                 f2fs_bug_on(sbi, ses->entry_cnt);
1884                 release_sit_entry_set(ses);
1885         }
1886
1887         f2fs_bug_on(sbi, !list_empty(head));
1888         f2fs_bug_on(sbi, sit_i->dirty_sentries);
1889 out:
1890         if (cpc->reason == CP_DISCARD) {
1891                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1892                         add_discard_addrs(sbi, cpc);
1893         }
1894         mutex_unlock(&sit_i->sentry_lock);
1895         mutex_unlock(&curseg->curseg_mutex);
1896
1897         set_prefree_as_free_segments(sbi);
1898 }
1899
1900 static int build_sit_info(struct f2fs_sb_info *sbi)
1901 {
1902         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1903         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1904         struct sit_info *sit_i;
1905         unsigned int sit_segs, start;
1906         char *src_bitmap, *dst_bitmap;
1907         unsigned int bitmap_size;
1908
1909         /* allocate memory for SIT information */
1910         sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1911         if (!sit_i)
1912                 return -ENOMEM;
1913
1914         SM_I(sbi)->sit_info = sit_i;
1915
1916         sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1917         if (!sit_i->sentries)
1918                 return -ENOMEM;
1919
1920         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1921         sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1922         if (!sit_i->dirty_sentries_bitmap)
1923                 return -ENOMEM;
1924
1925         for (start = 0; start < MAIN_SEGS(sbi); start++) {
1926                 sit_i->sentries[start].cur_valid_map
1927                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1928                 sit_i->sentries[start].ckpt_valid_map
1929                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1930                 sit_i->sentries[start].discard_map
1931                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1932                 if (!sit_i->sentries[start].cur_valid_map ||
1933                                 !sit_i->sentries[start].ckpt_valid_map ||
1934                                 !sit_i->sentries[start].discard_map)
1935                         return -ENOMEM;
1936         }
1937
1938         sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1939         if (!sit_i->tmp_map)
1940                 return -ENOMEM;
1941
1942         if (sbi->segs_per_sec > 1) {
1943                 sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1944                                         sizeof(struct sec_entry));
1945                 if (!sit_i->sec_entries)
1946                         return -ENOMEM;
1947         }
1948
1949         /* get information related with SIT */
1950         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1951
1952         /* setup SIT bitmap from ckeckpoint pack */
1953         bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1954         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1955
1956         dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1957         if (!dst_bitmap)
1958                 return -ENOMEM;
1959
1960         /* init SIT information */
1961         sit_i->s_ops = &default_salloc_ops;
1962
1963         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1964         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1965         sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1966         sit_i->sit_bitmap = dst_bitmap;
1967         sit_i->bitmap_size = bitmap_size;
1968         sit_i->dirty_sentries = 0;
1969         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1970         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1971         sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1972         mutex_init(&sit_i->sentry_lock);
1973         return 0;
1974 }
1975
1976 static int build_free_segmap(struct f2fs_sb_info *sbi)
1977 {
1978         struct free_segmap_info *free_i;
1979         unsigned int bitmap_size, sec_bitmap_size;
1980
1981         /* allocate memory for free segmap information */
1982         free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1983         if (!free_i)
1984                 return -ENOMEM;
1985
1986         SM_I(sbi)->free_info = free_i;
1987
1988         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1989         free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1990         if (!free_i->free_segmap)
1991                 return -ENOMEM;
1992
1993         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1994         free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1995         if (!free_i->free_secmap)
1996                 return -ENOMEM;
1997
1998         /* set all segments as dirty temporarily */
1999         memset(free_i->free_segmap, 0xff, bitmap_size);
2000         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
2001
2002         /* init free segmap information */
2003         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
2004         free_i->free_segments = 0;
2005         free_i->free_sections = 0;
2006         spin_lock_init(&free_i->segmap_lock);
2007         return 0;
2008 }
2009
2010 static int build_curseg(struct f2fs_sb_info *sbi)
2011 {
2012         struct curseg_info *array;
2013         int i;
2014
2015         array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
2016         if (!array)
2017                 return -ENOMEM;
2018
2019         SM_I(sbi)->curseg_array = array;
2020
2021         for (i = 0; i < NR_CURSEG_TYPE; i++) {
2022                 mutex_init(&array[i].curseg_mutex);
2023                 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
2024                 if (!array[i].sum_blk)
2025                         return -ENOMEM;
2026                 array[i].segno = NULL_SEGNO;
2027                 array[i].next_blkoff = 0;
2028         }
2029         return restore_curseg_summaries(sbi);
2030 }
2031
2032 static void build_sit_entries(struct f2fs_sb_info *sbi)
2033 {
2034         struct sit_info *sit_i = SIT_I(sbi);
2035         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
2036         struct f2fs_summary_block *sum = curseg->sum_blk;
2037         int sit_blk_cnt = SIT_BLK_CNT(sbi);
2038         unsigned int i, start, end;
2039         unsigned int readed, start_blk = 0;
2040         int nrpages = MAX_BIO_BLOCKS(sbi);
2041
2042         do {
2043                 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
2044
2045                 start = start_blk * sit_i->sents_per_block;
2046                 end = (start_blk + readed) * sit_i->sents_per_block;
2047
2048                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
2049                         struct seg_entry *se = &sit_i->sentries[start];
2050                         struct f2fs_sit_block *sit_blk;
2051                         struct f2fs_sit_entry sit;
2052                         struct page *page;
2053
2054                         mutex_lock(&curseg->curseg_mutex);
2055                         for (i = 0; i < sits_in_cursum(sum); i++) {
2056                                 if (le32_to_cpu(segno_in_journal(sum, i))
2057                                                                 == start) {
2058                                         sit = sit_in_journal(sum, i);
2059                                         mutex_unlock(&curseg->curseg_mutex);
2060                                         goto got_it;
2061                                 }
2062                         }
2063                         mutex_unlock(&curseg->curseg_mutex);
2064
2065                         page = get_current_sit_page(sbi, start);
2066                         sit_blk = (struct f2fs_sit_block *)page_address(page);
2067                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
2068                         f2fs_put_page(page, 1);
2069 got_it:
2070                         check_block_count(sbi, start, &sit);
2071                         seg_info_from_raw_sit(se, &sit);
2072
2073                         /* build discard map only one time */
2074                         memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
2075                         sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks;
2076
2077                         if (sbi->segs_per_sec > 1) {
2078                                 struct sec_entry *e = get_sec_entry(sbi, start);
2079                                 e->valid_blocks += se->valid_blocks;
2080                         }
2081                 }
2082                 start_blk += readed;
2083         } while (start_blk < sit_blk_cnt);
2084 }
2085
2086 static void init_free_segmap(struct f2fs_sb_info *sbi)
2087 {
2088         unsigned int start;
2089         int type;
2090
2091         for (start = 0; start < MAIN_SEGS(sbi); start++) {
2092                 struct seg_entry *sentry = get_seg_entry(sbi, start);
2093                 if (!sentry->valid_blocks)
2094                         __set_free(sbi, start);
2095         }
2096
2097         /* set use the current segments */
2098         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
2099                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
2100                 __set_test_and_inuse(sbi, curseg_t->segno);
2101         }
2102 }
2103
2104 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
2105 {
2106         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2107         struct free_segmap_info *free_i = FREE_I(sbi);
2108         unsigned int segno = 0, offset = 0;
2109         unsigned short valid_blocks;
2110
2111         while (1) {
2112                 /* find dirty segment based on free segmap */
2113                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
2114                 if (segno >= MAIN_SEGS(sbi))
2115                         break;
2116                 offset = segno + 1;
2117                 valid_blocks = get_valid_blocks(sbi, segno, 0);
2118                 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2119                         continue;
2120                 if (valid_blocks > sbi->blocks_per_seg) {
2121                         f2fs_bug_on(sbi, 1);
2122                         continue;
2123                 }
2124                 mutex_lock(&dirty_i->seglist_lock);
2125                 __locate_dirty_segment(sbi, segno, DIRTY);
2126                 mutex_unlock(&dirty_i->seglist_lock);
2127         }
2128 }
2129
2130 static int init_victim_secmap(struct f2fs_sb_info *sbi)
2131 {
2132         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2133         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2134
2135         dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
2136         if (!dirty_i->victim_secmap)
2137                 return -ENOMEM;
2138         return 0;
2139 }
2140
2141 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2142 {
2143         struct dirty_seglist_info *dirty_i;
2144         unsigned int bitmap_size, i;
2145
2146         /* allocate memory for dirty segments list information */
2147         dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2148         if (!dirty_i)
2149                 return -ENOMEM;
2150
2151         SM_I(sbi)->dirty_info = dirty_i;
2152         mutex_init(&dirty_i->seglist_lock);
2153
2154         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2155
2156         for (i = 0; i < NR_DIRTY_TYPE; i++) {
2157                 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
2158                 if (!dirty_i->dirty_segmap[i])
2159                         return -ENOMEM;
2160         }
2161
2162         init_dirty_segmap(sbi);
2163         return init_victim_secmap(sbi);
2164 }
2165
2166 /*
2167  * Update min, max modified time for cost-benefit GC algorithm
2168  */
2169 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2170 {
2171         struct sit_info *sit_i = SIT_I(sbi);
2172         unsigned int segno;
2173
2174         mutex_lock(&sit_i->sentry_lock);
2175
2176         sit_i->min_mtime = LLONG_MAX;
2177
2178         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2179                 unsigned int i;
2180                 unsigned long long mtime = 0;
2181
2182                 for (i = 0; i < sbi->segs_per_sec; i++)
2183                         mtime += get_seg_entry(sbi, segno + i)->mtime;
2184
2185                 mtime = div_u64(mtime, sbi->segs_per_sec);
2186
2187                 if (sit_i->min_mtime > mtime)
2188                         sit_i->min_mtime = mtime;
2189         }
2190         sit_i->max_mtime = get_mtime(sbi);
2191         mutex_unlock(&sit_i->sentry_lock);
2192 }
2193
2194 int build_segment_manager(struct f2fs_sb_info *sbi)
2195 {
2196         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2197         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2198         struct f2fs_sm_info *sm_info;
2199         int err;
2200
2201         sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2202         if (!sm_info)
2203                 return -ENOMEM;
2204
2205         /* init sm info */
2206         sbi->sm_info = sm_info;
2207         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2208         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2209         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2210         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2211         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2212         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2213         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2214         sm_info->rec_prefree_segments = sm_info->main_segments *
2215                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2216         sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2217         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2218         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2219
2220         INIT_LIST_HEAD(&sm_info->discard_list);
2221         sm_info->nr_discards = 0;
2222         sm_info->max_discards = 0;
2223
2224         sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2225
2226         INIT_LIST_HEAD(&sm_info->sit_entry_set);
2227
2228         if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2229                 err = create_flush_cmd_control(sbi);
2230                 if (err)
2231                         return err;
2232         }
2233
2234         err = build_sit_info(sbi);
2235         if (err)
2236                 return err;
2237         err = build_free_segmap(sbi);
2238         if (err)
2239                 return err;
2240         err = build_curseg(sbi);
2241         if (err)
2242                 return err;
2243
2244         /* reinit free segmap based on SIT */
2245         build_sit_entries(sbi);
2246
2247         init_free_segmap(sbi);
2248         err = build_dirty_segmap(sbi);
2249         if (err)
2250                 return err;
2251
2252         init_min_max_mtime(sbi);
2253         return 0;
2254 }
2255
2256 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2257                 enum dirty_type dirty_type)
2258 {
2259         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2260
2261         mutex_lock(&dirty_i->seglist_lock);
2262         kfree(dirty_i->dirty_segmap[dirty_type]);
2263         dirty_i->nr_dirty[dirty_type] = 0;
2264         mutex_unlock(&dirty_i->seglist_lock);
2265 }
2266
2267 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2268 {
2269         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2270         kfree(dirty_i->victim_secmap);
2271 }
2272
2273 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2274 {
2275         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2276         int i;
2277
2278         if (!dirty_i)
2279                 return;
2280
2281         /* discard pre-free/dirty segments list */
2282         for (i = 0; i < NR_DIRTY_TYPE; i++)
2283                 discard_dirty_segmap(sbi, i);
2284
2285         destroy_victim_secmap(sbi);
2286         SM_I(sbi)->dirty_info = NULL;
2287         kfree(dirty_i);
2288 }
2289
2290 static void destroy_curseg(struct f2fs_sb_info *sbi)
2291 {
2292         struct curseg_info *array = SM_I(sbi)->curseg_array;
2293         int i;
2294
2295         if (!array)
2296                 return;
2297         SM_I(sbi)->curseg_array = NULL;
2298         for (i = 0; i < NR_CURSEG_TYPE; i++)
2299                 kfree(array[i].sum_blk);
2300         kfree(array);
2301 }
2302
2303 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2304 {
2305         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2306         if (!free_i)
2307                 return;
2308         SM_I(sbi)->free_info = NULL;
2309         kfree(free_i->free_segmap);
2310         kfree(free_i->free_secmap);
2311         kfree(free_i);
2312 }
2313
2314 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2315 {
2316         struct sit_info *sit_i = SIT_I(sbi);
2317         unsigned int start;
2318
2319         if (!sit_i)
2320                 return;
2321
2322         if (sit_i->sentries) {
2323                 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2324                         kfree(sit_i->sentries[start].cur_valid_map);
2325                         kfree(sit_i->sentries[start].ckpt_valid_map);
2326                         kfree(sit_i->sentries[start].discard_map);
2327                 }
2328         }
2329         kfree(sit_i->tmp_map);
2330
2331         vfree(sit_i->sentries);
2332         vfree(sit_i->sec_entries);
2333         kfree(sit_i->dirty_sentries_bitmap);
2334
2335         SM_I(sbi)->sit_info = NULL;
2336         kfree(sit_i->sit_bitmap);
2337         kfree(sit_i);
2338 }
2339
2340 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2341 {
2342         struct f2fs_sm_info *sm_info = SM_I(sbi);
2343
2344         if (!sm_info)
2345                 return;
2346         destroy_flush_cmd_control(sbi);
2347         destroy_dirty_segmap(sbi);
2348         destroy_curseg(sbi);
2349         destroy_free_segmap(sbi);
2350         destroy_sit_info(sbi);
2351         sbi->sm_info = NULL;
2352         kfree(sm_info);
2353 }
2354
2355 int __init create_segment_manager_caches(void)
2356 {
2357         discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2358                         sizeof(struct discard_entry));
2359         if (!discard_entry_slab)
2360                 goto fail;
2361
2362         sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2363                         sizeof(struct sit_entry_set));
2364         if (!sit_entry_set_slab)
2365                 goto destory_discard_entry;
2366
2367         inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2368                         sizeof(struct inmem_pages));
2369         if (!inmem_entry_slab)
2370                 goto destroy_sit_entry_set;
2371         return 0;
2372
2373 destroy_sit_entry_set:
2374         kmem_cache_destroy(sit_entry_set_slab);
2375 destory_discard_entry:
2376         kmem_cache_destroy(discard_entry_slab);
2377 fail:
2378         return -ENOMEM;
2379 }
2380
2381 void destroy_segment_manager_caches(void)
2382 {
2383         kmem_cache_destroy(sit_entry_set_slab);
2384         kmem_cache_destroy(discard_entry_slab);
2385         kmem_cache_destroy(inmem_entry_slab);
2386 }