]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/f2fs/super.c
f2fs: skip scanning free nid bitmap of full NAT blocks
[karo-tx-linux.git] / fs / f2fs / super.c
1 /*
2  * fs/f2fs/super.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/statfs.h>
15 #include <linux/buffer_head.h>
16 #include <linux/backing-dev.h>
17 #include <linux/kthread.h>
18 #include <linux/parser.h>
19 #include <linux/mount.h>
20 #include <linux/seq_file.h>
21 #include <linux/proc_fs.h>
22 #include <linux/random.h>
23 #include <linux/exportfs.h>
24 #include <linux/blkdev.h>
25 #include <linux/f2fs_fs.h>
26 #include <linux/sysfs.h>
27
28 #include "f2fs.h"
29 #include "node.h"
30 #include "segment.h"
31 #include "xattr.h"
32 #include "gc.h"
33 #include "trace.h"
34
35 #define CREATE_TRACE_POINTS
36 #include <trace/events/f2fs.h>
37
38 static struct proc_dir_entry *f2fs_proc_root;
39 static struct kmem_cache *f2fs_inode_cachep;
40 static struct kset *f2fs_kset;
41
42 #ifdef CONFIG_F2FS_FAULT_INJECTION
43
44 char *fault_name[FAULT_MAX] = {
45         [FAULT_KMALLOC]         = "kmalloc",
46         [FAULT_PAGE_ALLOC]      = "page alloc",
47         [FAULT_ALLOC_NID]       = "alloc nid",
48         [FAULT_ORPHAN]          = "orphan",
49         [FAULT_BLOCK]           = "no more block",
50         [FAULT_DIR_DEPTH]       = "too big dir depth",
51         [FAULT_EVICT_INODE]     = "evict_inode fail",
52         [FAULT_IO]              = "IO error",
53         [FAULT_CHECKPOINT]      = "checkpoint error",
54 };
55
56 static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
57                                                 unsigned int rate)
58 {
59         struct f2fs_fault_info *ffi = &sbi->fault_info;
60
61         if (rate) {
62                 atomic_set(&ffi->inject_ops, 0);
63                 ffi->inject_rate = rate;
64                 ffi->inject_type = (1 << FAULT_MAX) - 1;
65         } else {
66                 memset(ffi, 0, sizeof(struct f2fs_fault_info));
67         }
68 }
69 #endif
70
71 /* f2fs-wide shrinker description */
72 static struct shrinker f2fs_shrinker_info = {
73         .scan_objects = f2fs_shrink_scan,
74         .count_objects = f2fs_shrink_count,
75         .seeks = DEFAULT_SEEKS,
76 };
77
78 enum {
79         Opt_gc_background,
80         Opt_disable_roll_forward,
81         Opt_norecovery,
82         Opt_discard,
83         Opt_nodiscard,
84         Opt_noheap,
85         Opt_user_xattr,
86         Opt_nouser_xattr,
87         Opt_acl,
88         Opt_noacl,
89         Opt_active_logs,
90         Opt_disable_ext_identify,
91         Opt_inline_xattr,
92         Opt_noinline_xattr,
93         Opt_inline_data,
94         Opt_inline_dentry,
95         Opt_noinline_dentry,
96         Opt_flush_merge,
97         Opt_noflush_merge,
98         Opt_nobarrier,
99         Opt_fastboot,
100         Opt_extent_cache,
101         Opt_noextent_cache,
102         Opt_noinline_data,
103         Opt_data_flush,
104         Opt_mode,
105         Opt_io_size_bits,
106         Opt_fault_injection,
107         Opt_lazytime,
108         Opt_nolazytime,
109         Opt_err,
110 };
111
112 static match_table_t f2fs_tokens = {
113         {Opt_gc_background, "background_gc=%s"},
114         {Opt_disable_roll_forward, "disable_roll_forward"},
115         {Opt_norecovery, "norecovery"},
116         {Opt_discard, "discard"},
117         {Opt_nodiscard, "nodiscard"},
118         {Opt_noheap, "no_heap"},
119         {Opt_user_xattr, "user_xattr"},
120         {Opt_nouser_xattr, "nouser_xattr"},
121         {Opt_acl, "acl"},
122         {Opt_noacl, "noacl"},
123         {Opt_active_logs, "active_logs=%u"},
124         {Opt_disable_ext_identify, "disable_ext_identify"},
125         {Opt_inline_xattr, "inline_xattr"},
126         {Opt_noinline_xattr, "noinline_xattr"},
127         {Opt_inline_data, "inline_data"},
128         {Opt_inline_dentry, "inline_dentry"},
129         {Opt_noinline_dentry, "noinline_dentry"},
130         {Opt_flush_merge, "flush_merge"},
131         {Opt_noflush_merge, "noflush_merge"},
132         {Opt_nobarrier, "nobarrier"},
133         {Opt_fastboot, "fastboot"},
134         {Opt_extent_cache, "extent_cache"},
135         {Opt_noextent_cache, "noextent_cache"},
136         {Opt_noinline_data, "noinline_data"},
137         {Opt_data_flush, "data_flush"},
138         {Opt_mode, "mode=%s"},
139         {Opt_io_size_bits, "io_bits=%u"},
140         {Opt_fault_injection, "fault_injection=%u"},
141         {Opt_lazytime, "lazytime"},
142         {Opt_nolazytime, "nolazytime"},
143         {Opt_err, NULL},
144 };
145
146 /* Sysfs support for f2fs */
147 enum {
148         GC_THREAD,      /* struct f2fs_gc_thread */
149         SM_INFO,        /* struct f2fs_sm_info */
150         DCC_INFO,       /* struct discard_cmd_control */
151         NM_INFO,        /* struct f2fs_nm_info */
152         F2FS_SBI,       /* struct f2fs_sb_info */
153 #ifdef CONFIG_F2FS_FAULT_INJECTION
154         FAULT_INFO_RATE,        /* struct f2fs_fault_info */
155         FAULT_INFO_TYPE,        /* struct f2fs_fault_info */
156 #endif
157 };
158
159 struct f2fs_attr {
160         struct attribute attr;
161         ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
162         ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
163                          const char *, size_t);
164         int struct_type;
165         int offset;
166 };
167
168 static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
169 {
170         if (struct_type == GC_THREAD)
171                 return (unsigned char *)sbi->gc_thread;
172         else if (struct_type == SM_INFO)
173                 return (unsigned char *)SM_I(sbi);
174         else if (struct_type == DCC_INFO)
175                 return (unsigned char *)SM_I(sbi)->dcc_info;
176         else if (struct_type == NM_INFO)
177                 return (unsigned char *)NM_I(sbi);
178         else if (struct_type == F2FS_SBI)
179                 return (unsigned char *)sbi;
180 #ifdef CONFIG_F2FS_FAULT_INJECTION
181         else if (struct_type == FAULT_INFO_RATE ||
182                                         struct_type == FAULT_INFO_TYPE)
183                 return (unsigned char *)&sbi->fault_info;
184 #endif
185         return NULL;
186 }
187
188 static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
189                 struct f2fs_sb_info *sbi, char *buf)
190 {
191         struct super_block *sb = sbi->sb;
192
193         if (!sb->s_bdev->bd_part)
194                 return snprintf(buf, PAGE_SIZE, "0\n");
195
196         return snprintf(buf, PAGE_SIZE, "%llu\n",
197                 (unsigned long long)(sbi->kbytes_written +
198                         BD_PART_WRITTEN(sbi)));
199 }
200
201 static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
202                         struct f2fs_sb_info *sbi, char *buf)
203 {
204         unsigned char *ptr = NULL;
205         unsigned int *ui;
206
207         ptr = __struct_ptr(sbi, a->struct_type);
208         if (!ptr)
209                 return -EINVAL;
210
211         ui = (unsigned int *)(ptr + a->offset);
212
213         return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
214 }
215
216 static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
217                         struct f2fs_sb_info *sbi,
218                         const char *buf, size_t count)
219 {
220         unsigned char *ptr;
221         unsigned long t;
222         unsigned int *ui;
223         ssize_t ret;
224
225         ptr = __struct_ptr(sbi, a->struct_type);
226         if (!ptr)
227                 return -EINVAL;
228
229         ui = (unsigned int *)(ptr + a->offset);
230
231         ret = kstrtoul(skip_spaces(buf), 0, &t);
232         if (ret < 0)
233                 return ret;
234 #ifdef CONFIG_F2FS_FAULT_INJECTION
235         if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
236                 return -EINVAL;
237 #endif
238         *ui = t;
239         return count;
240 }
241
242 static ssize_t f2fs_attr_show(struct kobject *kobj,
243                                 struct attribute *attr, char *buf)
244 {
245         struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
246                                                                 s_kobj);
247         struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
248
249         return a->show ? a->show(a, sbi, buf) : 0;
250 }
251
252 static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
253                                                 const char *buf, size_t len)
254 {
255         struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
256                                                                         s_kobj);
257         struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
258
259         return a->store ? a->store(a, sbi, buf, len) : 0;
260 }
261
262 static void f2fs_sb_release(struct kobject *kobj)
263 {
264         struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
265                                                                 s_kobj);
266         complete(&sbi->s_kobj_unregister);
267 }
268
269 #define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
270 static struct f2fs_attr f2fs_attr_##_name = {                   \
271         .attr = {.name = __stringify(_name), .mode = _mode },   \
272         .show   = _show,                                        \
273         .store  = _store,                                       \
274         .struct_type = _struct_type,                            \
275         .offset = _offset                                       \
276 }
277
278 #define F2FS_RW_ATTR(struct_type, struct_name, name, elname)    \
279         F2FS_ATTR_OFFSET(struct_type, name, 0644,               \
280                 f2fs_sbi_show, f2fs_sbi_store,                  \
281                 offsetof(struct struct_name, elname))
282
283 #define F2FS_GENERAL_RO_ATTR(name) \
284 static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
285
286 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
287 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
288 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
289 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
290 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
291 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
292 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
293 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
294 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
295 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
296 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
297 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
298 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
299 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
300 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
301 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
302 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
303 #ifdef CONFIG_F2FS_FAULT_INJECTION
304 F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
305 F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
306 #endif
307 F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
308
309 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
310 static struct attribute *f2fs_attrs[] = {
311         ATTR_LIST(gc_min_sleep_time),
312         ATTR_LIST(gc_max_sleep_time),
313         ATTR_LIST(gc_no_gc_sleep_time),
314         ATTR_LIST(gc_idle),
315         ATTR_LIST(reclaim_segments),
316         ATTR_LIST(max_small_discards),
317         ATTR_LIST(batched_trim_sections),
318         ATTR_LIST(ipu_policy),
319         ATTR_LIST(min_ipu_util),
320         ATTR_LIST(min_fsync_blocks),
321         ATTR_LIST(max_victim_search),
322         ATTR_LIST(dir_level),
323         ATTR_LIST(ram_thresh),
324         ATTR_LIST(ra_nid_pages),
325         ATTR_LIST(dirty_nats_ratio),
326         ATTR_LIST(cp_interval),
327         ATTR_LIST(idle_interval),
328 #ifdef CONFIG_F2FS_FAULT_INJECTION
329         ATTR_LIST(inject_rate),
330         ATTR_LIST(inject_type),
331 #endif
332         ATTR_LIST(lifetime_write_kbytes),
333         NULL,
334 };
335
336 static const struct sysfs_ops f2fs_attr_ops = {
337         .show   = f2fs_attr_show,
338         .store  = f2fs_attr_store,
339 };
340
341 static struct kobj_type f2fs_ktype = {
342         .default_attrs  = f2fs_attrs,
343         .sysfs_ops      = &f2fs_attr_ops,
344         .release        = f2fs_sb_release,
345 };
346
347 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
348 {
349         struct va_format vaf;
350         va_list args;
351
352         va_start(args, fmt);
353         vaf.fmt = fmt;
354         vaf.va = &args;
355         printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
356         va_end(args);
357 }
358
359 static void init_once(void *foo)
360 {
361         struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
362
363         inode_init_once(&fi->vfs_inode);
364 }
365
366 static int parse_options(struct super_block *sb, char *options)
367 {
368         struct f2fs_sb_info *sbi = F2FS_SB(sb);
369         struct request_queue *q;
370         substring_t args[MAX_OPT_ARGS];
371         char *p, *name;
372         int arg = 0;
373
374         if (!options)
375                 return 0;
376
377         while ((p = strsep(&options, ",")) != NULL) {
378                 int token;
379                 if (!*p)
380                         continue;
381                 /*
382                  * Initialize args struct so we know whether arg was
383                  * found; some options take optional arguments.
384                  */
385                 args[0].to = args[0].from = NULL;
386                 token = match_token(p, f2fs_tokens, args);
387
388                 switch (token) {
389                 case Opt_gc_background:
390                         name = match_strdup(&args[0]);
391
392                         if (!name)
393                                 return -ENOMEM;
394                         if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
395                                 set_opt(sbi, BG_GC);
396                                 clear_opt(sbi, FORCE_FG_GC);
397                         } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
398                                 clear_opt(sbi, BG_GC);
399                                 clear_opt(sbi, FORCE_FG_GC);
400                         } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
401                                 set_opt(sbi, BG_GC);
402                                 set_opt(sbi, FORCE_FG_GC);
403                         } else {
404                                 kfree(name);
405                                 return -EINVAL;
406                         }
407                         kfree(name);
408                         break;
409                 case Opt_disable_roll_forward:
410                         set_opt(sbi, DISABLE_ROLL_FORWARD);
411                         break;
412                 case Opt_norecovery:
413                         /* this option mounts f2fs with ro */
414                         set_opt(sbi, DISABLE_ROLL_FORWARD);
415                         if (!f2fs_readonly(sb))
416                                 return -EINVAL;
417                         break;
418                 case Opt_discard:
419                         q = bdev_get_queue(sb->s_bdev);
420                         if (blk_queue_discard(q)) {
421                                 set_opt(sbi, DISCARD);
422                         } else if (!f2fs_sb_mounted_blkzoned(sb)) {
423                                 f2fs_msg(sb, KERN_WARNING,
424                                         "mounting with \"discard\" option, but "
425                                         "the device does not support discard");
426                         }
427                         break;
428                 case Opt_nodiscard:
429                         if (f2fs_sb_mounted_blkzoned(sb)) {
430                                 f2fs_msg(sb, KERN_WARNING,
431                                         "discard is required for zoned block devices");
432                                 return -EINVAL;
433                         }
434                         clear_opt(sbi, DISCARD);
435                         break;
436                 case Opt_noheap:
437                         set_opt(sbi, NOHEAP);
438                         break;
439 #ifdef CONFIG_F2FS_FS_XATTR
440                 case Opt_user_xattr:
441                         set_opt(sbi, XATTR_USER);
442                         break;
443                 case Opt_nouser_xattr:
444                         clear_opt(sbi, XATTR_USER);
445                         break;
446                 case Opt_inline_xattr:
447                         set_opt(sbi, INLINE_XATTR);
448                         break;
449                 case Opt_noinline_xattr:
450                         clear_opt(sbi, INLINE_XATTR);
451                         break;
452 #else
453                 case Opt_user_xattr:
454                         f2fs_msg(sb, KERN_INFO,
455                                 "user_xattr options not supported");
456                         break;
457                 case Opt_nouser_xattr:
458                         f2fs_msg(sb, KERN_INFO,
459                                 "nouser_xattr options not supported");
460                         break;
461                 case Opt_inline_xattr:
462                         f2fs_msg(sb, KERN_INFO,
463                                 "inline_xattr options not supported");
464                         break;
465                 case Opt_noinline_xattr:
466                         f2fs_msg(sb, KERN_INFO,
467                                 "noinline_xattr options not supported");
468                         break;
469 #endif
470 #ifdef CONFIG_F2FS_FS_POSIX_ACL
471                 case Opt_acl:
472                         set_opt(sbi, POSIX_ACL);
473                         break;
474                 case Opt_noacl:
475                         clear_opt(sbi, POSIX_ACL);
476                         break;
477 #else
478                 case Opt_acl:
479                         f2fs_msg(sb, KERN_INFO, "acl options not supported");
480                         break;
481                 case Opt_noacl:
482                         f2fs_msg(sb, KERN_INFO, "noacl options not supported");
483                         break;
484 #endif
485                 case Opt_active_logs:
486                         if (args->from && match_int(args, &arg))
487                                 return -EINVAL;
488                         if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
489                                 return -EINVAL;
490                         sbi->active_logs = arg;
491                         break;
492                 case Opt_disable_ext_identify:
493                         set_opt(sbi, DISABLE_EXT_IDENTIFY);
494                         break;
495                 case Opt_inline_data:
496                         set_opt(sbi, INLINE_DATA);
497                         break;
498                 case Opt_inline_dentry:
499                         set_opt(sbi, INLINE_DENTRY);
500                         break;
501                 case Opt_noinline_dentry:
502                         clear_opt(sbi, INLINE_DENTRY);
503                         break;
504                 case Opt_flush_merge:
505                         set_opt(sbi, FLUSH_MERGE);
506                         break;
507                 case Opt_noflush_merge:
508                         clear_opt(sbi, FLUSH_MERGE);
509                         break;
510                 case Opt_nobarrier:
511                         set_opt(sbi, NOBARRIER);
512                         break;
513                 case Opt_fastboot:
514                         set_opt(sbi, FASTBOOT);
515                         break;
516                 case Opt_extent_cache:
517                         set_opt(sbi, EXTENT_CACHE);
518                         break;
519                 case Opt_noextent_cache:
520                         clear_opt(sbi, EXTENT_CACHE);
521                         break;
522                 case Opt_noinline_data:
523                         clear_opt(sbi, INLINE_DATA);
524                         break;
525                 case Opt_data_flush:
526                         set_opt(sbi, DATA_FLUSH);
527                         break;
528                 case Opt_mode:
529                         name = match_strdup(&args[0]);
530
531                         if (!name)
532                                 return -ENOMEM;
533                         if (strlen(name) == 8 &&
534                                         !strncmp(name, "adaptive", 8)) {
535                                 if (f2fs_sb_mounted_blkzoned(sb)) {
536                                         f2fs_msg(sb, KERN_WARNING,
537                                                  "adaptive mode is not allowed with "
538                                                  "zoned block device feature");
539                                         kfree(name);
540                                         return -EINVAL;
541                                 }
542                                 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
543                         } else if (strlen(name) == 3 &&
544                                         !strncmp(name, "lfs", 3)) {
545                                 set_opt_mode(sbi, F2FS_MOUNT_LFS);
546                         } else {
547                                 kfree(name);
548                                 return -EINVAL;
549                         }
550                         kfree(name);
551                         break;
552                 case Opt_io_size_bits:
553                         if (args->from && match_int(args, &arg))
554                                 return -EINVAL;
555                         if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
556                                 f2fs_msg(sb, KERN_WARNING,
557                                         "Not support %d, larger than %d",
558                                         1 << arg, BIO_MAX_PAGES);
559                                 return -EINVAL;
560                         }
561                         sbi->write_io_size_bits = arg;
562                         break;
563                 case Opt_fault_injection:
564                         if (args->from && match_int(args, &arg))
565                                 return -EINVAL;
566 #ifdef CONFIG_F2FS_FAULT_INJECTION
567                         f2fs_build_fault_attr(sbi, arg);
568                         set_opt(sbi, FAULT_INJECTION);
569 #else
570                         f2fs_msg(sb, KERN_INFO,
571                                 "FAULT_INJECTION was not selected");
572 #endif
573                         break;
574                 case Opt_lazytime:
575                         sb->s_flags |= MS_LAZYTIME;
576                         break;
577                 case Opt_nolazytime:
578                         sb->s_flags &= ~MS_LAZYTIME;
579                         break;
580                 default:
581                         f2fs_msg(sb, KERN_ERR,
582                                 "Unrecognized mount option \"%s\" or missing value",
583                                 p);
584                         return -EINVAL;
585                 }
586         }
587
588         if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
589                 f2fs_msg(sb, KERN_ERR,
590                                 "Should set mode=lfs with %uKB-sized IO",
591                                 F2FS_IO_SIZE_KB(sbi));
592                 return -EINVAL;
593         }
594         return 0;
595 }
596
597 static struct inode *f2fs_alloc_inode(struct super_block *sb)
598 {
599         struct f2fs_inode_info *fi;
600
601         fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
602         if (!fi)
603                 return NULL;
604
605         init_once((void *) fi);
606
607         /* Initialize f2fs-specific inode info */
608         fi->vfs_inode.i_version = 1;
609         atomic_set(&fi->dirty_pages, 0);
610         fi->i_current_depth = 1;
611         fi->i_advise = 0;
612         init_rwsem(&fi->i_sem);
613         INIT_LIST_HEAD(&fi->dirty_list);
614         INIT_LIST_HEAD(&fi->gdirty_list);
615         INIT_LIST_HEAD(&fi->inmem_pages);
616         mutex_init(&fi->inmem_lock);
617         init_rwsem(&fi->dio_rwsem[READ]);
618         init_rwsem(&fi->dio_rwsem[WRITE]);
619
620         /* Will be used by directory only */
621         fi->i_dir_level = F2FS_SB(sb)->dir_level;
622         return &fi->vfs_inode;
623 }
624
625 static int f2fs_drop_inode(struct inode *inode)
626 {
627         int ret;
628         /*
629          * This is to avoid a deadlock condition like below.
630          * writeback_single_inode(inode)
631          *  - f2fs_write_data_page
632          *    - f2fs_gc -> iput -> evict
633          *       - inode_wait_for_writeback(inode)
634          */
635         if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) {
636                 if (!inode->i_nlink && !is_bad_inode(inode)) {
637                         /* to avoid evict_inode call simultaneously */
638                         atomic_inc(&inode->i_count);
639                         spin_unlock(&inode->i_lock);
640
641                         /* some remained atomic pages should discarded */
642                         if (f2fs_is_atomic_file(inode))
643                                 drop_inmem_pages(inode);
644
645                         /* should remain fi->extent_tree for writepage */
646                         f2fs_destroy_extent_node(inode);
647
648                         sb_start_intwrite(inode->i_sb);
649                         f2fs_i_size_write(inode, 0);
650
651                         if (F2FS_HAS_BLOCKS(inode))
652                                 f2fs_truncate(inode);
653
654                         sb_end_intwrite(inode->i_sb);
655
656                         fscrypt_put_encryption_info(inode, NULL);
657                         spin_lock(&inode->i_lock);
658                         atomic_dec(&inode->i_count);
659                 }
660                 trace_f2fs_drop_inode(inode, 0);
661                 return 0;
662         }
663         ret = generic_drop_inode(inode);
664         trace_f2fs_drop_inode(inode, ret);
665         return ret;
666 }
667
668 int f2fs_inode_dirtied(struct inode *inode, bool sync)
669 {
670         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
671         int ret = 0;
672
673         spin_lock(&sbi->inode_lock[DIRTY_META]);
674         if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
675                 ret = 1;
676         } else {
677                 set_inode_flag(inode, FI_DIRTY_INODE);
678                 stat_inc_dirty_inode(sbi, DIRTY_META);
679         }
680         if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
681                 list_add_tail(&F2FS_I(inode)->gdirty_list,
682                                 &sbi->inode_list[DIRTY_META]);
683                 inc_page_count(sbi, F2FS_DIRTY_IMETA);
684         }
685         spin_unlock(&sbi->inode_lock[DIRTY_META]);
686         return ret;
687 }
688
689 void f2fs_inode_synced(struct inode *inode)
690 {
691         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
692
693         spin_lock(&sbi->inode_lock[DIRTY_META]);
694         if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) {
695                 spin_unlock(&sbi->inode_lock[DIRTY_META]);
696                 return;
697         }
698         if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
699                 list_del_init(&F2FS_I(inode)->gdirty_list);
700                 dec_page_count(sbi, F2FS_DIRTY_IMETA);
701         }
702         clear_inode_flag(inode, FI_DIRTY_INODE);
703         clear_inode_flag(inode, FI_AUTO_RECOVER);
704         stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
705         spin_unlock(&sbi->inode_lock[DIRTY_META]);
706 }
707
708 /*
709  * f2fs_dirty_inode() is called from __mark_inode_dirty()
710  *
711  * We should call set_dirty_inode to write the dirty inode through write_inode.
712  */
713 static void f2fs_dirty_inode(struct inode *inode, int flags)
714 {
715         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
716
717         if (inode->i_ino == F2FS_NODE_INO(sbi) ||
718                         inode->i_ino == F2FS_META_INO(sbi))
719                 return;
720
721         if (flags == I_DIRTY_TIME)
722                 return;
723
724         if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
725                 clear_inode_flag(inode, FI_AUTO_RECOVER);
726
727         f2fs_inode_dirtied(inode, false);
728 }
729
730 static void f2fs_i_callback(struct rcu_head *head)
731 {
732         struct inode *inode = container_of(head, struct inode, i_rcu);
733         kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
734 }
735
736 static void f2fs_destroy_inode(struct inode *inode)
737 {
738         call_rcu(&inode->i_rcu, f2fs_i_callback);
739 }
740
741 static void destroy_percpu_info(struct f2fs_sb_info *sbi)
742 {
743         percpu_counter_destroy(&sbi->alloc_valid_block_count);
744         percpu_counter_destroy(&sbi->total_valid_inode_count);
745 }
746
747 static void destroy_device_list(struct f2fs_sb_info *sbi)
748 {
749         int i;
750
751         for (i = 0; i < sbi->s_ndevs; i++) {
752                 blkdev_put(FDEV(i).bdev, FMODE_EXCL);
753 #ifdef CONFIG_BLK_DEV_ZONED
754                 kfree(FDEV(i).blkz_type);
755 #endif
756         }
757         kfree(sbi->devs);
758 }
759
760 static void f2fs_put_super(struct super_block *sb)
761 {
762         struct f2fs_sb_info *sbi = F2FS_SB(sb);
763
764         if (sbi->s_proc) {
765                 remove_proc_entry("segment_info", sbi->s_proc);
766                 remove_proc_entry("segment_bits", sbi->s_proc);
767                 remove_proc_entry(sb->s_id, f2fs_proc_root);
768         }
769         kobject_del(&sbi->s_kobj);
770
771         stop_gc_thread(sbi);
772
773         /* prevent remaining shrinker jobs */
774         mutex_lock(&sbi->umount_mutex);
775
776         /*
777          * We don't need to do checkpoint when superblock is clean.
778          * But, the previous checkpoint was not done by umount, it needs to do
779          * clean checkpoint again.
780          */
781         if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
782                         !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
783                 struct cp_control cpc = {
784                         .reason = CP_UMOUNT,
785                 };
786                 write_checkpoint(sbi, &cpc);
787         }
788
789         /* be sure to wait for any on-going discard commands */
790         f2fs_wait_discard_bio(sbi, NULL_ADDR);
791
792         /* write_checkpoint can update stat informaion */
793         f2fs_destroy_stats(sbi);
794
795         /*
796          * normally superblock is clean, so we need to release this.
797          * In addition, EIO will skip do checkpoint, we need this as well.
798          */
799         release_ino_entry(sbi, true);
800
801         f2fs_leave_shrinker(sbi);
802         mutex_unlock(&sbi->umount_mutex);
803
804         /* our cp_error case, we can wait for any writeback page */
805         f2fs_flush_merged_bios(sbi);
806
807         iput(sbi->node_inode);
808         iput(sbi->meta_inode);
809
810         /* destroy f2fs internal modules */
811         destroy_node_manager(sbi);
812         destroy_segment_manager(sbi);
813
814         kfree(sbi->ckpt);
815         kobject_put(&sbi->s_kobj);
816         wait_for_completion(&sbi->s_kobj_unregister);
817
818         sb->s_fs_info = NULL;
819         if (sbi->s_chksum_driver)
820                 crypto_free_shash(sbi->s_chksum_driver);
821         kfree(sbi->raw_super);
822
823         destroy_device_list(sbi);
824         mempool_destroy(sbi->write_io_dummy);
825         destroy_percpu_info(sbi);
826         kfree(sbi);
827 }
828
829 int f2fs_sync_fs(struct super_block *sb, int sync)
830 {
831         struct f2fs_sb_info *sbi = F2FS_SB(sb);
832         int err = 0;
833
834         trace_f2fs_sync_fs(sb, sync);
835
836         if (sync) {
837                 struct cp_control cpc;
838
839                 cpc.reason = __get_cp_reason(sbi);
840
841                 mutex_lock(&sbi->gc_mutex);
842                 err = write_checkpoint(sbi, &cpc);
843                 mutex_unlock(&sbi->gc_mutex);
844         }
845         f2fs_trace_ios(NULL, 1);
846
847         return err;
848 }
849
850 static int f2fs_freeze(struct super_block *sb)
851 {
852         if (f2fs_readonly(sb))
853                 return 0;
854
855         /* IO error happened before */
856         if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
857                 return -EIO;
858
859         /* must be clean, since sync_filesystem() was already called */
860         if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
861                 return -EINVAL;
862         return 0;
863 }
864
865 static int f2fs_unfreeze(struct super_block *sb)
866 {
867         return 0;
868 }
869
870 static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
871 {
872         struct super_block *sb = dentry->d_sb;
873         struct f2fs_sb_info *sbi = F2FS_SB(sb);
874         u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
875         block_t total_count, user_block_count, start_count, ovp_count;
876
877         total_count = le64_to_cpu(sbi->raw_super->block_count);
878         user_block_count = sbi->user_block_count;
879         start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
880         ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
881         buf->f_type = F2FS_SUPER_MAGIC;
882         buf->f_bsize = sbi->blocksize;
883
884         buf->f_blocks = total_count - start_count;
885         buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
886         buf->f_bavail = user_block_count - valid_user_blocks(sbi);
887
888         buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
889         buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
890                                                         buf->f_bavail);
891
892         buf->f_namelen = F2FS_NAME_LEN;
893         buf->f_fsid.val[0] = (u32)id;
894         buf->f_fsid.val[1] = (u32)(id >> 32);
895
896         return 0;
897 }
898
899 static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
900 {
901         struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
902
903         if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
904                 if (test_opt(sbi, FORCE_FG_GC))
905                         seq_printf(seq, ",background_gc=%s", "sync");
906                 else
907                         seq_printf(seq, ",background_gc=%s", "on");
908         } else {
909                 seq_printf(seq, ",background_gc=%s", "off");
910         }
911         if (test_opt(sbi, DISABLE_ROLL_FORWARD))
912                 seq_puts(seq, ",disable_roll_forward");
913         if (test_opt(sbi, DISCARD))
914                 seq_puts(seq, ",discard");
915         if (test_opt(sbi, NOHEAP))
916                 seq_puts(seq, ",no_heap_alloc");
917 #ifdef CONFIG_F2FS_FS_XATTR
918         if (test_opt(sbi, XATTR_USER))
919                 seq_puts(seq, ",user_xattr");
920         else
921                 seq_puts(seq, ",nouser_xattr");
922         if (test_opt(sbi, INLINE_XATTR))
923                 seq_puts(seq, ",inline_xattr");
924         else
925                 seq_puts(seq, ",noinline_xattr");
926 #endif
927 #ifdef CONFIG_F2FS_FS_POSIX_ACL
928         if (test_opt(sbi, POSIX_ACL))
929                 seq_puts(seq, ",acl");
930         else
931                 seq_puts(seq, ",noacl");
932 #endif
933         if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
934                 seq_puts(seq, ",disable_ext_identify");
935         if (test_opt(sbi, INLINE_DATA))
936                 seq_puts(seq, ",inline_data");
937         else
938                 seq_puts(seq, ",noinline_data");
939         if (test_opt(sbi, INLINE_DENTRY))
940                 seq_puts(seq, ",inline_dentry");
941         else
942                 seq_puts(seq, ",noinline_dentry");
943         if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
944                 seq_puts(seq, ",flush_merge");
945         if (test_opt(sbi, NOBARRIER))
946                 seq_puts(seq, ",nobarrier");
947         if (test_opt(sbi, FASTBOOT))
948                 seq_puts(seq, ",fastboot");
949         if (test_opt(sbi, EXTENT_CACHE))
950                 seq_puts(seq, ",extent_cache");
951         else
952                 seq_puts(seq, ",noextent_cache");
953         if (test_opt(sbi, DATA_FLUSH))
954                 seq_puts(seq, ",data_flush");
955
956         seq_puts(seq, ",mode=");
957         if (test_opt(sbi, ADAPTIVE))
958                 seq_puts(seq, "adaptive");
959         else if (test_opt(sbi, LFS))
960                 seq_puts(seq, "lfs");
961         seq_printf(seq, ",active_logs=%u", sbi->active_logs);
962         if (F2FS_IO_SIZE_BITS(sbi))
963                 seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
964 #ifdef CONFIG_F2FS_FAULT_INJECTION
965         if (test_opt(sbi, FAULT_INJECTION))
966                 seq_puts(seq, ",fault_injection");
967 #endif
968
969         return 0;
970 }
971
972 static int segment_info_seq_show(struct seq_file *seq, void *offset)
973 {
974         struct super_block *sb = seq->private;
975         struct f2fs_sb_info *sbi = F2FS_SB(sb);
976         unsigned int total_segs =
977                         le32_to_cpu(sbi->raw_super->segment_count_main);
978         int i;
979
980         seq_puts(seq, "format: segment_type|valid_blocks\n"
981                 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
982
983         for (i = 0; i < total_segs; i++) {
984                 struct seg_entry *se = get_seg_entry(sbi, i);
985
986                 if ((i % 10) == 0)
987                         seq_printf(seq, "%-10d", i);
988                 seq_printf(seq, "%d|%-3u", se->type,
989                                         get_valid_blocks(sbi, i, 1));
990                 if ((i % 10) == 9 || i == (total_segs - 1))
991                         seq_putc(seq, '\n');
992                 else
993                         seq_putc(seq, ' ');
994         }
995
996         return 0;
997 }
998
999 static int segment_bits_seq_show(struct seq_file *seq, void *offset)
1000 {
1001         struct super_block *sb = seq->private;
1002         struct f2fs_sb_info *sbi = F2FS_SB(sb);
1003         unsigned int total_segs =
1004                         le32_to_cpu(sbi->raw_super->segment_count_main);
1005         int i, j;
1006
1007         seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
1008                 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
1009
1010         for (i = 0; i < total_segs; i++) {
1011                 struct seg_entry *se = get_seg_entry(sbi, i);
1012
1013                 seq_printf(seq, "%-10d", i);
1014                 seq_printf(seq, "%d|%-3u|", se->type,
1015                                         get_valid_blocks(sbi, i, 1));
1016                 for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
1017                         seq_printf(seq, " %.2x", se->cur_valid_map[j]);
1018                 seq_putc(seq, '\n');
1019         }
1020         return 0;
1021 }
1022
1023 #define F2FS_PROC_FILE_DEF(_name)                                       \
1024 static int _name##_open_fs(struct inode *inode, struct file *file)      \
1025 {                                                                       \
1026         return single_open(file, _name##_seq_show, PDE_DATA(inode));    \
1027 }                                                                       \
1028                                                                         \
1029 static const struct file_operations f2fs_seq_##_name##_fops = {         \
1030         .open = _name##_open_fs,                                        \
1031         .read = seq_read,                                               \
1032         .llseek = seq_lseek,                                            \
1033         .release = single_release,                                      \
1034 };
1035
1036 F2FS_PROC_FILE_DEF(segment_info);
1037 F2FS_PROC_FILE_DEF(segment_bits);
1038
1039 static void default_options(struct f2fs_sb_info *sbi)
1040 {
1041         /* init some FS parameters */
1042         sbi->active_logs = NR_CURSEG_TYPE;
1043
1044         set_opt(sbi, BG_GC);
1045         set_opt(sbi, INLINE_XATTR);
1046         set_opt(sbi, INLINE_DATA);
1047         set_opt(sbi, INLINE_DENTRY);
1048         set_opt(sbi, EXTENT_CACHE);
1049         sbi->sb->s_flags |= MS_LAZYTIME;
1050         set_opt(sbi, FLUSH_MERGE);
1051         if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
1052                 set_opt_mode(sbi, F2FS_MOUNT_LFS);
1053                 set_opt(sbi, DISCARD);
1054         } else {
1055                 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
1056         }
1057
1058 #ifdef CONFIG_F2FS_FS_XATTR
1059         set_opt(sbi, XATTR_USER);
1060 #endif
1061 #ifdef CONFIG_F2FS_FS_POSIX_ACL
1062         set_opt(sbi, POSIX_ACL);
1063 #endif
1064
1065 #ifdef CONFIG_F2FS_FAULT_INJECTION
1066         f2fs_build_fault_attr(sbi, 0);
1067 #endif
1068 }
1069
1070 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
1071 {
1072         struct f2fs_sb_info *sbi = F2FS_SB(sb);
1073         struct f2fs_mount_info org_mount_opt;
1074         int err, active_logs;
1075         bool need_restart_gc = false;
1076         bool need_stop_gc = false;
1077         bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
1078 #ifdef CONFIG_F2FS_FAULT_INJECTION
1079         struct f2fs_fault_info ffi = sbi->fault_info;
1080 #endif
1081
1082         /*
1083          * Save the old mount options in case we
1084          * need to restore them.
1085          */
1086         org_mount_opt = sbi->mount_opt;
1087         active_logs = sbi->active_logs;
1088
1089         /* recover superblocks we couldn't write due to previous RO mount */
1090         if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
1091                 err = f2fs_commit_super(sbi, false);
1092                 f2fs_msg(sb, KERN_INFO,
1093                         "Try to recover all the superblocks, ret: %d", err);
1094                 if (!err)
1095                         clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1096         }
1097
1098         sbi->mount_opt.opt = 0;
1099         default_options(sbi);
1100
1101         /* parse mount options */
1102         err = parse_options(sb, data);
1103         if (err)
1104                 goto restore_opts;
1105
1106         /*
1107          * Previous and new state of filesystem is RO,
1108          * so skip checking GC and FLUSH_MERGE conditions.
1109          */
1110         if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
1111                 goto skip;
1112
1113         /* disallow enable/disable extent_cache dynamically */
1114         if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
1115                 err = -EINVAL;
1116                 f2fs_msg(sbi->sb, KERN_WARNING,
1117                                 "switch extent_cache option is not allowed");
1118                 goto restore_opts;
1119         }
1120
1121         /*
1122          * We stop the GC thread if FS is mounted as RO
1123          * or if background_gc = off is passed in mount
1124          * option. Also sync the filesystem.
1125          */
1126         if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
1127                 if (sbi->gc_thread) {
1128                         stop_gc_thread(sbi);
1129                         need_restart_gc = true;
1130                 }
1131         } else if (!sbi->gc_thread) {
1132                 err = start_gc_thread(sbi);
1133                 if (err)
1134                         goto restore_opts;
1135                 need_stop_gc = true;
1136         }
1137
1138         if (*flags & MS_RDONLY) {
1139                 writeback_inodes_sb(sb, WB_REASON_SYNC);
1140                 sync_inodes_sb(sb);
1141
1142                 set_sbi_flag(sbi, SBI_IS_DIRTY);
1143                 set_sbi_flag(sbi, SBI_IS_CLOSE);
1144                 f2fs_sync_fs(sb, 1);
1145                 clear_sbi_flag(sbi, SBI_IS_CLOSE);
1146         }
1147
1148         /*
1149          * We stop issue flush thread if FS is mounted as RO
1150          * or if flush_merge is not passed in mount option.
1151          */
1152         if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
1153                 clear_opt(sbi, FLUSH_MERGE);
1154                 destroy_flush_cmd_control(sbi, false);
1155         } else {
1156                 err = create_flush_cmd_control(sbi);
1157                 if (err)
1158                         goto restore_gc;
1159         }
1160 skip:
1161         /* Update the POSIXACL Flag */
1162         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1163                 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
1164
1165         return 0;
1166 restore_gc:
1167         if (need_restart_gc) {
1168                 if (start_gc_thread(sbi))
1169                         f2fs_msg(sbi->sb, KERN_WARNING,
1170                                 "background gc thread has stopped");
1171         } else if (need_stop_gc) {
1172                 stop_gc_thread(sbi);
1173         }
1174 restore_opts:
1175         sbi->mount_opt = org_mount_opt;
1176         sbi->active_logs = active_logs;
1177 #ifdef CONFIG_F2FS_FAULT_INJECTION
1178         sbi->fault_info = ffi;
1179 #endif
1180         return err;
1181 }
1182
1183 static struct super_operations f2fs_sops = {
1184         .alloc_inode    = f2fs_alloc_inode,
1185         .drop_inode     = f2fs_drop_inode,
1186         .destroy_inode  = f2fs_destroy_inode,
1187         .write_inode    = f2fs_write_inode,
1188         .dirty_inode    = f2fs_dirty_inode,
1189         .show_options   = f2fs_show_options,
1190         .evict_inode    = f2fs_evict_inode,
1191         .put_super      = f2fs_put_super,
1192         .sync_fs        = f2fs_sync_fs,
1193         .freeze_fs      = f2fs_freeze,
1194         .unfreeze_fs    = f2fs_unfreeze,
1195         .statfs         = f2fs_statfs,
1196         .remount_fs     = f2fs_remount,
1197 };
1198
1199 #ifdef CONFIG_F2FS_FS_ENCRYPTION
1200 static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
1201 {
1202         return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
1203                                 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
1204                                 ctx, len, NULL);
1205 }
1206
1207 static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
1208                                                         void *fs_data)
1209 {
1210         return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
1211                                 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
1212                                 ctx, len, fs_data, XATTR_CREATE);
1213 }
1214
1215 static unsigned f2fs_max_namelen(struct inode *inode)
1216 {
1217         return S_ISLNK(inode->i_mode) ?
1218                         inode->i_sb->s_blocksize : F2FS_NAME_LEN;
1219 }
1220
1221 static const struct fscrypt_operations f2fs_cryptops = {
1222         .key_prefix     = "f2fs:",
1223         .get_context    = f2fs_get_context,
1224         .set_context    = f2fs_set_context,
1225         .is_encrypted   = f2fs_encrypted_inode,
1226         .empty_dir      = f2fs_empty_dir,
1227         .max_namelen    = f2fs_max_namelen,
1228 };
1229 #else
1230 static const struct fscrypt_operations f2fs_cryptops = {
1231         .is_encrypted   = f2fs_encrypted_inode,
1232 };
1233 #endif
1234
1235 static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
1236                 u64 ino, u32 generation)
1237 {
1238         struct f2fs_sb_info *sbi = F2FS_SB(sb);
1239         struct inode *inode;
1240
1241         if (check_nid_range(sbi, ino))
1242                 return ERR_PTR(-ESTALE);
1243
1244         /*
1245          * f2fs_iget isn't quite right if the inode is currently unallocated!
1246          * However f2fs_iget currently does appropriate checks to handle stale
1247          * inodes so everything is OK.
1248          */
1249         inode = f2fs_iget(sb, ino);
1250         if (IS_ERR(inode))
1251                 return ERR_CAST(inode);
1252         if (unlikely(generation && inode->i_generation != generation)) {
1253                 /* we didn't find the right inode.. */
1254                 iput(inode);
1255                 return ERR_PTR(-ESTALE);
1256         }
1257         return inode;
1258 }
1259
1260 static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1261                 int fh_len, int fh_type)
1262 {
1263         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1264                                     f2fs_nfs_get_inode);
1265 }
1266
1267 static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
1268                 int fh_len, int fh_type)
1269 {
1270         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1271                                     f2fs_nfs_get_inode);
1272 }
1273
1274 static const struct export_operations f2fs_export_ops = {
1275         .fh_to_dentry = f2fs_fh_to_dentry,
1276         .fh_to_parent = f2fs_fh_to_parent,
1277         .get_parent = f2fs_get_parent,
1278 };
1279
1280 static loff_t max_file_blocks(void)
1281 {
1282         loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
1283         loff_t leaf_count = ADDRS_PER_BLOCK;
1284
1285         /* two direct node blocks */
1286         result += (leaf_count * 2);
1287
1288         /* two indirect node blocks */
1289         leaf_count *= NIDS_PER_BLOCK;
1290         result += (leaf_count * 2);
1291
1292         /* one double indirect node block */
1293         leaf_count *= NIDS_PER_BLOCK;
1294         result += leaf_count;
1295
1296         return result;
1297 }
1298
1299 static int __f2fs_commit_super(struct buffer_head *bh,
1300                         struct f2fs_super_block *super)
1301 {
1302         lock_buffer(bh);
1303         if (super)
1304                 memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
1305         set_buffer_uptodate(bh);
1306         set_buffer_dirty(bh);
1307         unlock_buffer(bh);
1308
1309         /* it's rare case, we can do fua all the time */
1310         return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA);
1311 }
1312
1313 static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
1314                                         struct buffer_head *bh)
1315 {
1316         struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1317                                         (bh->b_data + F2FS_SUPER_OFFSET);
1318         struct super_block *sb = sbi->sb;
1319         u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1320         u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
1321         u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
1322         u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
1323         u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1324         u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1325         u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
1326         u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
1327         u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
1328         u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
1329         u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
1330         u32 segment_count = le32_to_cpu(raw_super->segment_count);
1331         u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1332         u64 main_end_blkaddr = main_blkaddr +
1333                                 (segment_count_main << log_blocks_per_seg);
1334         u64 seg_end_blkaddr = segment0_blkaddr +
1335                                 (segment_count << log_blocks_per_seg);
1336
1337         if (segment0_blkaddr != cp_blkaddr) {
1338                 f2fs_msg(sb, KERN_INFO,
1339                         "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
1340                         segment0_blkaddr, cp_blkaddr);
1341                 return true;
1342         }
1343
1344         if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
1345                                                         sit_blkaddr) {
1346                 f2fs_msg(sb, KERN_INFO,
1347                         "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
1348                         cp_blkaddr, sit_blkaddr,
1349                         segment_count_ckpt << log_blocks_per_seg);
1350                 return true;
1351         }
1352
1353         if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
1354                                                         nat_blkaddr) {
1355                 f2fs_msg(sb, KERN_INFO,
1356                         "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
1357                         sit_blkaddr, nat_blkaddr,
1358                         segment_count_sit << log_blocks_per_seg);
1359                 return true;
1360         }
1361
1362         if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
1363                                                         ssa_blkaddr) {
1364                 f2fs_msg(sb, KERN_INFO,
1365                         "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
1366                         nat_blkaddr, ssa_blkaddr,
1367                         segment_count_nat << log_blocks_per_seg);
1368                 return true;
1369         }
1370
1371         if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
1372                                                         main_blkaddr) {
1373                 f2fs_msg(sb, KERN_INFO,
1374                         "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
1375                         ssa_blkaddr, main_blkaddr,
1376                         segment_count_ssa << log_blocks_per_seg);
1377                 return true;
1378         }
1379
1380         if (main_end_blkaddr > seg_end_blkaddr) {
1381                 f2fs_msg(sb, KERN_INFO,
1382                         "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
1383                         main_blkaddr,
1384                         segment0_blkaddr +
1385                                 (segment_count << log_blocks_per_seg),
1386                         segment_count_main << log_blocks_per_seg);
1387                 return true;
1388         } else if (main_end_blkaddr < seg_end_blkaddr) {
1389                 int err = 0;
1390                 char *res;
1391
1392                 /* fix in-memory information all the time */
1393                 raw_super->segment_count = cpu_to_le32((main_end_blkaddr -
1394                                 segment0_blkaddr) >> log_blocks_per_seg);
1395
1396                 if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) {
1397                         set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1398                         res = "internally";
1399                 } else {
1400                         err = __f2fs_commit_super(bh, NULL);
1401                         res = err ? "failed" : "done";
1402                 }
1403                 f2fs_msg(sb, KERN_INFO,
1404                         "Fix alignment : %s, start(%u) end(%u) block(%u)",
1405                         res, main_blkaddr,
1406                         segment0_blkaddr +
1407                                 (segment_count << log_blocks_per_seg),
1408                         segment_count_main << log_blocks_per_seg);
1409                 if (err)
1410                         return true;
1411         }
1412         return false;
1413 }
1414
1415 static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
1416                                 struct buffer_head *bh)
1417 {
1418         struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1419                                         (bh->b_data + F2FS_SUPER_OFFSET);
1420         struct super_block *sb = sbi->sb;
1421         unsigned int blocksize;
1422
1423         if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
1424                 f2fs_msg(sb, KERN_INFO,
1425                         "Magic Mismatch, valid(0x%x) - read(0x%x)",
1426                         F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
1427                 return 1;
1428         }
1429
1430         /* Currently, support only 4KB page cache size */
1431         if (F2FS_BLKSIZE != PAGE_SIZE) {
1432                 f2fs_msg(sb, KERN_INFO,
1433                         "Invalid page_cache_size (%lu), supports only 4KB\n",
1434                         PAGE_SIZE);
1435                 return 1;
1436         }
1437
1438         /* Currently, support only 4KB block size */
1439         blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
1440         if (blocksize != F2FS_BLKSIZE) {
1441                 f2fs_msg(sb, KERN_INFO,
1442                         "Invalid blocksize (%u), supports only 4KB\n",
1443                         blocksize);
1444                 return 1;
1445         }
1446
1447         /* check log blocks per segment */
1448         if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
1449                 f2fs_msg(sb, KERN_INFO,
1450                         "Invalid log blocks per segment (%u)\n",
1451                         le32_to_cpu(raw_super->log_blocks_per_seg));
1452                 return 1;
1453         }
1454
1455         /* Currently, support 512/1024/2048/4096 bytes sector size */
1456         if (le32_to_cpu(raw_super->log_sectorsize) >
1457                                 F2FS_MAX_LOG_SECTOR_SIZE ||
1458                 le32_to_cpu(raw_super->log_sectorsize) <
1459                                 F2FS_MIN_LOG_SECTOR_SIZE) {
1460                 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
1461                         le32_to_cpu(raw_super->log_sectorsize));
1462                 return 1;
1463         }
1464         if (le32_to_cpu(raw_super->log_sectors_per_block) +
1465                 le32_to_cpu(raw_super->log_sectorsize) !=
1466                         F2FS_MAX_LOG_SECTOR_SIZE) {
1467                 f2fs_msg(sb, KERN_INFO,
1468                         "Invalid log sectors per block(%u) log sectorsize(%u)",
1469                         le32_to_cpu(raw_super->log_sectors_per_block),
1470                         le32_to_cpu(raw_super->log_sectorsize));
1471                 return 1;
1472         }
1473
1474         /* check reserved ino info */
1475         if (le32_to_cpu(raw_super->node_ino) != 1 ||
1476                 le32_to_cpu(raw_super->meta_ino) != 2 ||
1477                 le32_to_cpu(raw_super->root_ino) != 3) {
1478                 f2fs_msg(sb, KERN_INFO,
1479                         "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
1480                         le32_to_cpu(raw_super->node_ino),
1481                         le32_to_cpu(raw_super->meta_ino),
1482                         le32_to_cpu(raw_super->root_ino));
1483                 return 1;
1484         }
1485
1486         /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
1487         if (sanity_check_area_boundary(sbi, bh))
1488                 return 1;
1489
1490         return 0;
1491 }
1492
1493 int sanity_check_ckpt(struct f2fs_sb_info *sbi)
1494 {
1495         unsigned int total, fsmeta;
1496         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1497         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1498         unsigned int ovp_segments, reserved_segments;
1499
1500         total = le32_to_cpu(raw_super->segment_count);
1501         fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
1502         fsmeta += le32_to_cpu(raw_super->segment_count_sit);
1503         fsmeta += le32_to_cpu(raw_super->segment_count_nat);
1504         fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
1505         fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
1506
1507         if (unlikely(fsmeta >= total))
1508                 return 1;
1509
1510         ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1511         reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1512
1513         if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
1514                         ovp_segments == 0 || reserved_segments == 0)) {
1515                 f2fs_msg(sbi->sb, KERN_ERR,
1516                         "Wrong layout: check mkfs.f2fs version");
1517                 return 1;
1518         }
1519
1520         if (unlikely(f2fs_cp_error(sbi))) {
1521                 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
1522                 return 1;
1523         }
1524         return 0;
1525 }
1526
1527 static void init_sb_info(struct f2fs_sb_info *sbi)
1528 {
1529         struct f2fs_super_block *raw_super = sbi->raw_super;
1530         int i;
1531
1532         sbi->log_sectors_per_block =
1533                 le32_to_cpu(raw_super->log_sectors_per_block);
1534         sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
1535         sbi->blocksize = 1 << sbi->log_blocksize;
1536         sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1537         sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
1538         sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
1539         sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
1540         sbi->total_sections = le32_to_cpu(raw_super->section_count);
1541         sbi->total_node_count =
1542                 (le32_to_cpu(raw_super->segment_count_nat) / 2)
1543                         * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
1544         sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
1545         sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
1546         sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
1547         sbi->cur_victim_sec = NULL_SECNO;
1548         sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
1549
1550         sbi->dir_level = DEF_DIR_LEVEL;
1551         sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
1552         sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
1553         clear_sbi_flag(sbi, SBI_NEED_FSCK);
1554
1555         for (i = 0; i < NR_COUNT_TYPE; i++)
1556                 atomic_set(&sbi->nr_pages[i], 0);
1557
1558         INIT_LIST_HEAD(&sbi->s_list);
1559         mutex_init(&sbi->umount_mutex);
1560         mutex_init(&sbi->wio_mutex[NODE]);
1561         mutex_init(&sbi->wio_mutex[DATA]);
1562         spin_lock_init(&sbi->cp_lock);
1563 }
1564
1565 static int init_percpu_info(struct f2fs_sb_info *sbi)
1566 {
1567         int err;
1568
1569         err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
1570         if (err)
1571                 return err;
1572
1573         return percpu_counter_init(&sbi->total_valid_inode_count, 0,
1574                                                                 GFP_KERNEL);
1575 }
1576
1577 #ifdef CONFIG_BLK_DEV_ZONED
1578 static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
1579 {
1580         struct block_device *bdev = FDEV(devi).bdev;
1581         sector_t nr_sectors = bdev->bd_part->nr_sects;
1582         sector_t sector = 0;
1583         struct blk_zone *zones;
1584         unsigned int i, nr_zones;
1585         unsigned int n = 0;
1586         int err = -EIO;
1587
1588         if (!f2fs_sb_mounted_blkzoned(sbi->sb))
1589                 return 0;
1590
1591         if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
1592                                 SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
1593                 return -EINVAL;
1594         sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
1595         if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
1596                                 __ilog2_u32(sbi->blocks_per_blkz))
1597                 return -EINVAL;
1598         sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
1599         FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
1600                                         sbi->log_blocks_per_blkz;
1601         if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
1602                 FDEV(devi).nr_blkz++;
1603
1604         FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
1605         if (!FDEV(devi).blkz_type)
1606                 return -ENOMEM;
1607
1608 #define F2FS_REPORT_NR_ZONES   4096
1609
1610         zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
1611                         GFP_KERNEL);
1612         if (!zones)
1613                 return -ENOMEM;
1614
1615         /* Get block zones type */
1616         while (zones && sector < nr_sectors) {
1617
1618                 nr_zones = F2FS_REPORT_NR_ZONES;
1619                 err = blkdev_report_zones(bdev, sector,
1620                                           zones, &nr_zones,
1621                                           GFP_KERNEL);
1622                 if (err)
1623                         break;
1624                 if (!nr_zones) {
1625                         err = -EIO;
1626                         break;
1627                 }
1628
1629                 for (i = 0; i < nr_zones; i++) {
1630                         FDEV(devi).blkz_type[n] = zones[i].type;
1631                         sector += zones[i].len;
1632                         n++;
1633                 }
1634         }
1635
1636         kfree(zones);
1637
1638         return err;
1639 }
1640 #endif
1641
1642 /*
1643  * Read f2fs raw super block.
1644  * Because we have two copies of super block, so read both of them
1645  * to get the first valid one. If any one of them is broken, we pass
1646  * them recovery flag back to the caller.
1647  */
1648 static int read_raw_super_block(struct f2fs_sb_info *sbi,
1649                         struct f2fs_super_block **raw_super,
1650                         int *valid_super_block, int *recovery)
1651 {
1652         struct super_block *sb = sbi->sb;
1653         int block;
1654         struct buffer_head *bh;
1655         struct f2fs_super_block *super;
1656         int err = 0;
1657
1658         super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
1659         if (!super)
1660                 return -ENOMEM;
1661
1662         for (block = 0; block < 2; block++) {
1663                 bh = sb_bread(sb, block);
1664                 if (!bh) {
1665                         f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
1666                                 block + 1);
1667                         err = -EIO;
1668                         continue;
1669                 }
1670
1671                 /* sanity checking of raw super */
1672                 if (sanity_check_raw_super(sbi, bh)) {
1673                         f2fs_msg(sb, KERN_ERR,
1674                                 "Can't find valid F2FS filesystem in %dth superblock",
1675                                 block + 1);
1676                         err = -EINVAL;
1677                         brelse(bh);
1678                         continue;
1679                 }
1680
1681                 if (!*raw_super) {
1682                         memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
1683                                                         sizeof(*super));
1684                         *valid_super_block = block;
1685                         *raw_super = super;
1686                 }
1687                 brelse(bh);
1688         }
1689
1690         /* Fail to read any one of the superblocks*/
1691         if (err < 0)
1692                 *recovery = 1;
1693
1694         /* No valid superblock */
1695         if (!*raw_super)
1696                 kfree(super);
1697         else
1698                 err = 0;
1699
1700         return err;
1701 }
1702
1703 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1704 {
1705         struct buffer_head *bh;
1706         int err;
1707
1708         if ((recover && f2fs_readonly(sbi->sb)) ||
1709                                 bdev_read_only(sbi->sb->s_bdev)) {
1710                 set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1711                 return -EROFS;
1712         }
1713
1714         /* write back-up superblock first */
1715         bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1);
1716         if (!bh)
1717                 return -EIO;
1718         err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1719         brelse(bh);
1720
1721         /* if we are in recovery path, skip writing valid superblock */
1722         if (recover || err)
1723                 return err;
1724
1725         /* write current valid superblock */
1726         bh = sb_getblk(sbi->sb, sbi->valid_super_block);
1727         if (!bh)
1728                 return -EIO;
1729         err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1730         brelse(bh);
1731         return err;
1732 }
1733
1734 static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1735 {
1736         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1737         unsigned int max_devices = MAX_DEVICES;
1738         int i;
1739
1740         /* Initialize single device information */
1741         if (!RDEV(0).path[0]) {
1742                 if (!bdev_is_zoned(sbi->sb->s_bdev))
1743                         return 0;
1744                 max_devices = 1;
1745         }
1746
1747         /*
1748          * Initialize multiple devices information, or single
1749          * zoned block device information.
1750          */
1751         sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
1752                                 GFP_KERNEL);
1753         if (!sbi->devs)
1754                 return -ENOMEM;
1755
1756         for (i = 0; i < max_devices; i++) {
1757
1758                 if (i > 0 && !RDEV(i).path[0])
1759                         break;
1760
1761                 if (max_devices == 1) {
1762                         /* Single zoned block device mount */
1763                         FDEV(0).bdev =
1764                                 blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
1765                                         sbi->sb->s_mode, sbi->sb->s_type);
1766                 } else {
1767                         /* Multi-device mount */
1768                         memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
1769                         FDEV(i).total_segments =
1770                                 le32_to_cpu(RDEV(i).total_segments);
1771                         if (i == 0) {
1772                                 FDEV(i).start_blk = 0;
1773                                 FDEV(i).end_blk = FDEV(i).start_blk +
1774                                     (FDEV(i).total_segments <<
1775                                     sbi->log_blocks_per_seg) - 1 +
1776                                     le32_to_cpu(raw_super->segment0_blkaddr);
1777                         } else {
1778                                 FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1779                                 FDEV(i).end_blk = FDEV(i).start_blk +
1780                                         (FDEV(i).total_segments <<
1781                                         sbi->log_blocks_per_seg) - 1;
1782                         }
1783                         FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
1784                                         sbi->sb->s_mode, sbi->sb->s_type);
1785                 }
1786                 if (IS_ERR(FDEV(i).bdev))
1787                         return PTR_ERR(FDEV(i).bdev);
1788
1789                 /* to release errored devices */
1790                 sbi->s_ndevs = i + 1;
1791
1792 #ifdef CONFIG_BLK_DEV_ZONED
1793                 if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
1794                                 !f2fs_sb_mounted_blkzoned(sbi->sb)) {
1795                         f2fs_msg(sbi->sb, KERN_ERR,
1796                                 "Zoned block device feature not enabled\n");
1797                         return -EINVAL;
1798                 }
1799                 if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
1800                         if (init_blkz_info(sbi, i)) {
1801                                 f2fs_msg(sbi->sb, KERN_ERR,
1802                                         "Failed to initialize F2FS blkzone information");
1803                                 return -EINVAL;
1804                         }
1805                         if (max_devices == 1)
1806                                 break;
1807                         f2fs_msg(sbi->sb, KERN_INFO,
1808                                 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
1809                                 i, FDEV(i).path,
1810                                 FDEV(i).total_segments,
1811                                 FDEV(i).start_blk, FDEV(i).end_blk,
1812                                 bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
1813                                 "Host-aware" : "Host-managed");
1814                         continue;
1815                 }
1816 #endif
1817                 f2fs_msg(sbi->sb, KERN_INFO,
1818                         "Mount Device [%2d]: %20s, %8u, %8x - %8x",
1819                                 i, FDEV(i).path,
1820                                 FDEV(i).total_segments,
1821                                 FDEV(i).start_blk, FDEV(i).end_blk);
1822         }
1823         f2fs_msg(sbi->sb, KERN_INFO,
1824                         "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
1825         return 0;
1826 }
1827
1828 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1829 {
1830         struct f2fs_sb_info *sbi;
1831         struct f2fs_super_block *raw_super;
1832         struct inode *root;
1833         int err;
1834         bool retry = true, need_fsck = false;
1835         char *options = NULL;
1836         int recovery, i, valid_super_block;
1837         struct curseg_info *seg_i;
1838
1839 try_onemore:
1840         err = -EINVAL;
1841         raw_super = NULL;
1842         valid_super_block = -1;
1843         recovery = 0;
1844
1845         /* allocate memory for f2fs-specific super block info */
1846         sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
1847         if (!sbi)
1848                 return -ENOMEM;
1849
1850         sbi->sb = sb;
1851
1852         /* Load the checksum driver */
1853         sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
1854         if (IS_ERR(sbi->s_chksum_driver)) {
1855                 f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
1856                 err = PTR_ERR(sbi->s_chksum_driver);
1857                 sbi->s_chksum_driver = NULL;
1858                 goto free_sbi;
1859         }
1860
1861         /* set a block size */
1862         if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
1863                 f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
1864                 goto free_sbi;
1865         }
1866
1867         err = read_raw_super_block(sbi, &raw_super, &valid_super_block,
1868                                                                 &recovery);
1869         if (err)
1870                 goto free_sbi;
1871
1872         sb->s_fs_info = sbi;
1873         sbi->raw_super = raw_super;
1874
1875         /*
1876          * The BLKZONED feature indicates that the drive was formatted with
1877          * zone alignment optimization. This is optional for host-aware
1878          * devices, but mandatory for host-managed zoned block devices.
1879          */
1880 #ifndef CONFIG_BLK_DEV_ZONED
1881         if (f2fs_sb_mounted_blkzoned(sb)) {
1882                 f2fs_msg(sb, KERN_ERR,
1883                          "Zoned block device support is not enabled\n");
1884                 goto free_sb_buf;
1885         }
1886 #endif
1887         default_options(sbi);
1888         /* parse mount options */
1889         options = kstrdup((const char *)data, GFP_KERNEL);
1890         if (data && !options) {
1891                 err = -ENOMEM;
1892                 goto free_sb_buf;
1893         }
1894
1895         err = parse_options(sb, options);
1896         if (err)
1897                 goto free_options;
1898
1899         sbi->max_file_blocks = max_file_blocks();
1900         sb->s_maxbytes = sbi->max_file_blocks <<
1901                                 le32_to_cpu(raw_super->log_blocksize);
1902         sb->s_max_links = F2FS_LINK_MAX;
1903         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1904
1905         sb->s_op = &f2fs_sops;
1906         sb->s_cop = &f2fs_cryptops;
1907         sb->s_xattr = f2fs_xattr_handlers;
1908         sb->s_export_op = &f2fs_export_ops;
1909         sb->s_magic = F2FS_SUPER_MAGIC;
1910         sb->s_time_gran = 1;
1911         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1912                 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
1913         memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
1914
1915         /* init f2fs-specific super block info */
1916         sbi->valid_super_block = valid_super_block;
1917         mutex_init(&sbi->gc_mutex);
1918         mutex_init(&sbi->cp_mutex);
1919         init_rwsem(&sbi->node_write);
1920
1921         /* disallow all the data/node/meta page writes */
1922         set_sbi_flag(sbi, SBI_POR_DOING);
1923         spin_lock_init(&sbi->stat_lock);
1924
1925         init_rwsem(&sbi->read_io.io_rwsem);
1926         sbi->read_io.sbi = sbi;
1927         sbi->read_io.bio = NULL;
1928         for (i = 0; i < NR_PAGE_TYPE; i++) {
1929                 init_rwsem(&sbi->write_io[i].io_rwsem);
1930                 sbi->write_io[i].sbi = sbi;
1931                 sbi->write_io[i].bio = NULL;
1932         }
1933
1934         init_rwsem(&sbi->cp_rwsem);
1935         init_waitqueue_head(&sbi->cp_wait);
1936         init_sb_info(sbi);
1937
1938         err = init_percpu_info(sbi);
1939         if (err)
1940                 goto free_options;
1941
1942         if (F2FS_IO_SIZE(sbi) > 1) {
1943                 sbi->write_io_dummy =
1944                         mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
1945                 if (!sbi->write_io_dummy)
1946                         goto free_options;
1947         }
1948
1949         /* get an inode for meta space */
1950         sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
1951         if (IS_ERR(sbi->meta_inode)) {
1952                 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
1953                 err = PTR_ERR(sbi->meta_inode);
1954                 goto free_io_dummy;
1955         }
1956
1957         err = get_valid_checkpoint(sbi);
1958         if (err) {
1959                 f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
1960                 goto free_meta_inode;
1961         }
1962
1963         /* Initialize device list */
1964         err = f2fs_scan_devices(sbi);
1965         if (err) {
1966                 f2fs_msg(sb, KERN_ERR, "Failed to find devices");
1967                 goto free_devices;
1968         }
1969
1970         sbi->total_valid_node_count =
1971                                 le32_to_cpu(sbi->ckpt->valid_node_count);
1972         percpu_counter_set(&sbi->total_valid_inode_count,
1973                                 le32_to_cpu(sbi->ckpt->valid_inode_count));
1974         sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
1975         sbi->total_valid_block_count =
1976                                 le64_to_cpu(sbi->ckpt->valid_block_count);
1977         sbi->last_valid_block_count = sbi->total_valid_block_count;
1978
1979         for (i = 0; i < NR_INODE_TYPE; i++) {
1980                 INIT_LIST_HEAD(&sbi->inode_list[i]);
1981                 spin_lock_init(&sbi->inode_lock[i]);
1982         }
1983
1984         init_extent_cache_info(sbi);
1985
1986         init_ino_entry_info(sbi);
1987
1988         /* setup f2fs internal modules */
1989         err = build_segment_manager(sbi);
1990         if (err) {
1991                 f2fs_msg(sb, KERN_ERR,
1992                         "Failed to initialize F2FS segment manager");
1993                 goto free_sm;
1994         }
1995         err = build_node_manager(sbi);
1996         if (err) {
1997                 f2fs_msg(sb, KERN_ERR,
1998                         "Failed to initialize F2FS node manager");
1999                 goto free_nm;
2000         }
2001
2002         /* For write statistics */
2003         if (sb->s_bdev->bd_part)
2004                 sbi->sectors_written_start =
2005                         (u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
2006
2007         /* Read accumulated write IO statistics if exists */
2008         seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
2009         if (__exist_node_summaries(sbi))
2010                 sbi->kbytes_written =
2011                         le64_to_cpu(seg_i->journal->info.kbytes_written);
2012
2013         build_gc_manager(sbi);
2014
2015         /* get an inode for node space */
2016         sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
2017         if (IS_ERR(sbi->node_inode)) {
2018                 f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
2019                 err = PTR_ERR(sbi->node_inode);
2020                 goto free_nm;
2021         }
2022
2023         f2fs_join_shrinker(sbi);
2024
2025         /* if there are nt orphan nodes free them */
2026         err = recover_orphan_inodes(sbi);
2027         if (err)
2028                 goto free_node_inode;
2029
2030         /* read root inode and dentry */
2031         root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
2032         if (IS_ERR(root)) {
2033                 f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
2034                 err = PTR_ERR(root);
2035                 goto free_node_inode;
2036         }
2037         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2038                 iput(root);
2039                 err = -EINVAL;
2040                 goto free_node_inode;
2041         }
2042
2043         sb->s_root = d_make_root(root); /* allocate root dentry */
2044         if (!sb->s_root) {
2045                 err = -ENOMEM;
2046                 goto free_root_inode;
2047         }
2048
2049         err = f2fs_build_stats(sbi);
2050         if (err)
2051                 goto free_root_inode;
2052
2053         if (f2fs_proc_root)
2054                 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
2055
2056         if (sbi->s_proc) {
2057                 proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
2058                                  &f2fs_seq_segment_info_fops, sb);
2059                 proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
2060                                  &f2fs_seq_segment_bits_fops, sb);
2061         }
2062
2063         sbi->s_kobj.kset = f2fs_kset;
2064         init_completion(&sbi->s_kobj_unregister);
2065         err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
2066                                                         "%s", sb->s_id);
2067         if (err)
2068                 goto free_proc;
2069
2070         /* recover fsynced data */
2071         if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
2072                 /*
2073                  * mount should be failed, when device has readonly mode, and
2074                  * previous checkpoint was not done by clean system shutdown.
2075                  */
2076                 if (bdev_read_only(sb->s_bdev) &&
2077                                 !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
2078                         err = -EROFS;
2079                         goto free_kobj;
2080                 }
2081
2082                 if (need_fsck)
2083                         set_sbi_flag(sbi, SBI_NEED_FSCK);
2084
2085                 if (!retry)
2086                         goto skip_recovery;
2087
2088                 err = recover_fsync_data(sbi, false);
2089                 if (err < 0) {
2090                         need_fsck = true;
2091                         f2fs_msg(sb, KERN_ERR,
2092                                 "Cannot recover all fsync data errno=%d", err);
2093                         goto free_kobj;
2094                 }
2095         } else {
2096                 err = recover_fsync_data(sbi, true);
2097
2098                 if (!f2fs_readonly(sb) && err > 0) {
2099                         err = -EINVAL;
2100                         f2fs_msg(sb, KERN_ERR,
2101                                 "Need to recover fsync data");
2102                         goto free_kobj;
2103                 }
2104         }
2105 skip_recovery:
2106         /* recover_fsync_data() cleared this already */
2107         clear_sbi_flag(sbi, SBI_POR_DOING);
2108
2109         /*
2110          * If filesystem is not mounted as read-only then
2111          * do start the gc_thread.
2112          */
2113         if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
2114                 /* After POR, we can run background GC thread.*/
2115                 err = start_gc_thread(sbi);
2116                 if (err)
2117                         goto free_kobj;
2118         }
2119         kfree(options);
2120
2121         /* recover broken superblock */
2122         if (recovery) {
2123                 err = f2fs_commit_super(sbi, true);
2124                 f2fs_msg(sb, KERN_INFO,
2125                         "Try to recover %dth superblock, ret: %d",
2126                         sbi->valid_super_block ? 1 : 2, err);
2127         }
2128
2129         f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
2130                                 cur_cp_version(F2FS_CKPT(sbi)));
2131         f2fs_update_time(sbi, CP_TIME);
2132         f2fs_update_time(sbi, REQ_TIME);
2133         return 0;
2134
2135 free_kobj:
2136         f2fs_sync_inode_meta(sbi);
2137         kobject_del(&sbi->s_kobj);
2138         kobject_put(&sbi->s_kobj);
2139         wait_for_completion(&sbi->s_kobj_unregister);
2140 free_proc:
2141         if (sbi->s_proc) {
2142                 remove_proc_entry("segment_info", sbi->s_proc);
2143                 remove_proc_entry("segment_bits", sbi->s_proc);
2144                 remove_proc_entry(sb->s_id, f2fs_proc_root);
2145         }
2146         f2fs_destroy_stats(sbi);
2147 free_root_inode:
2148         dput(sb->s_root);
2149         sb->s_root = NULL;
2150 free_node_inode:
2151         truncate_inode_pages_final(NODE_MAPPING(sbi));
2152         mutex_lock(&sbi->umount_mutex);
2153         release_ino_entry(sbi, true);
2154         f2fs_leave_shrinker(sbi);
2155         /*
2156          * Some dirty meta pages can be produced by recover_orphan_inodes()
2157          * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
2158          * followed by write_checkpoint() through f2fs_write_node_pages(), which
2159          * falls into an infinite loop in sync_meta_pages().
2160          */
2161         truncate_inode_pages_final(META_MAPPING(sbi));
2162         iput(sbi->node_inode);
2163         mutex_unlock(&sbi->umount_mutex);
2164 free_nm:
2165         destroy_node_manager(sbi);
2166 free_sm:
2167         destroy_segment_manager(sbi);
2168 free_devices:
2169         destroy_device_list(sbi);
2170         kfree(sbi->ckpt);
2171 free_meta_inode:
2172         make_bad_inode(sbi->meta_inode);
2173         iput(sbi->meta_inode);
2174 free_io_dummy:
2175         mempool_destroy(sbi->write_io_dummy);
2176 free_options:
2177         destroy_percpu_info(sbi);
2178         kfree(options);
2179 free_sb_buf:
2180         kfree(raw_super);
2181 free_sbi:
2182         if (sbi->s_chksum_driver)
2183                 crypto_free_shash(sbi->s_chksum_driver);
2184         kfree(sbi);
2185
2186         /* give only one another chance */
2187         if (retry) {
2188                 retry = false;
2189                 shrink_dcache_sb(sb);
2190                 goto try_onemore;
2191         }
2192         return err;
2193 }
2194
2195 static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
2196                         const char *dev_name, void *data)
2197 {
2198         return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
2199 }
2200
2201 static void kill_f2fs_super(struct super_block *sb)
2202 {
2203         if (sb->s_root)
2204                 set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
2205         kill_block_super(sb);
2206 }
2207
2208 static struct file_system_type f2fs_fs_type = {
2209         .owner          = THIS_MODULE,
2210         .name           = "f2fs",
2211         .mount          = f2fs_mount,
2212         .kill_sb        = kill_f2fs_super,
2213         .fs_flags       = FS_REQUIRES_DEV,
2214 };
2215 MODULE_ALIAS_FS("f2fs");
2216
2217 static int __init init_inodecache(void)
2218 {
2219         f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache",
2220                         sizeof(struct f2fs_inode_info), 0,
2221                         SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL);
2222         if (!f2fs_inode_cachep)
2223                 return -ENOMEM;
2224         return 0;
2225 }
2226
2227 static void destroy_inodecache(void)
2228 {
2229         /*
2230          * Make sure all delayed rcu free inodes are flushed before we
2231          * destroy cache.
2232          */
2233         rcu_barrier();
2234         kmem_cache_destroy(f2fs_inode_cachep);
2235 }
2236
2237 static int __init init_f2fs_fs(void)
2238 {
2239         int err;
2240
2241         f2fs_build_trace_ios();
2242
2243         err = init_inodecache();
2244         if (err)
2245                 goto fail;
2246         err = create_node_manager_caches();
2247         if (err)
2248                 goto free_inodecache;
2249         err = create_segment_manager_caches();
2250         if (err)
2251                 goto free_node_manager_caches;
2252         err = create_checkpoint_caches();
2253         if (err)
2254                 goto free_segment_manager_caches;
2255         err = create_extent_cache();
2256         if (err)
2257                 goto free_checkpoint_caches;
2258         f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
2259         if (!f2fs_kset) {
2260                 err = -ENOMEM;
2261                 goto free_extent_cache;
2262         }
2263         err = register_shrinker(&f2fs_shrinker_info);
2264         if (err)
2265                 goto free_kset;
2266
2267         err = register_filesystem(&f2fs_fs_type);
2268         if (err)
2269                 goto free_shrinker;
2270         err = f2fs_create_root_stats();
2271         if (err)
2272                 goto free_filesystem;
2273         f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
2274         return 0;
2275
2276 free_filesystem:
2277         unregister_filesystem(&f2fs_fs_type);
2278 free_shrinker:
2279         unregister_shrinker(&f2fs_shrinker_info);
2280 free_kset:
2281         kset_unregister(f2fs_kset);
2282 free_extent_cache:
2283         destroy_extent_cache();
2284 free_checkpoint_caches:
2285         destroy_checkpoint_caches();
2286 free_segment_manager_caches:
2287         destroy_segment_manager_caches();
2288 free_node_manager_caches:
2289         destroy_node_manager_caches();
2290 free_inodecache:
2291         destroy_inodecache();
2292 fail:
2293         return err;
2294 }
2295
2296 static void __exit exit_f2fs_fs(void)
2297 {
2298         remove_proc_entry("fs/f2fs", NULL);
2299         f2fs_destroy_root_stats();
2300         unregister_filesystem(&f2fs_fs_type);
2301         unregister_shrinker(&f2fs_shrinker_info);
2302         kset_unregister(f2fs_kset);
2303         destroy_extent_cache();
2304         destroy_checkpoint_caches();
2305         destroy_segment_manager_caches();
2306         destroy_node_manager_caches();
2307         destroy_inodecache();
2308         f2fs_destroy_trace_ios();
2309 }
2310
2311 module_init(init_f2fs_fs)
2312 module_exit(exit_f2fs_fs)
2313
2314 MODULE_AUTHOR("Samsung Electronics's Praesto Team");
2315 MODULE_DESCRIPTION("Flash Friendly File System");
2316 MODULE_LICENSE("GPL");
2317