]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/logfs/segment.c
Merge branch 'pm-acpi' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
[karo-tx-linux.git] / fs / logfs / segment.c
1 /*
2  * fs/logfs/segment.c   - Handling the Object Store
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7  *
8  * Object store or ostore makes up the complete device with exception of
9  * the superblock and journal areas.  Apart from its own metadata it stores
10  * three kinds of objects: inodes, dentries and blocks, both data and indirect.
11  */
12 #include "logfs.h"
13 #include <linux/slab.h>
14
15 static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
16 {
17         struct logfs_super *super = logfs_super(sb);
18         struct btree_head32 *head = &super->s_reserved_segments;
19         int err;
20
21         err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
22         if (err)
23                 return err;
24         logfs_super(sb)->s_bad_segments++;
25         /* FIXME: write to journal */
26         return 0;
27 }
28
29 int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
30 {
31         struct logfs_super *super = logfs_super(sb);
32
33         super->s_gec++;
34
35         return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
36                         super->s_segsize, ensure_erase);
37 }
38
39 static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
40 {
41         s32 ofs;
42
43         logfs_open_area(area, bytes);
44
45         ofs = area->a_used_bytes;
46         area->a_used_bytes += bytes;
47         BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
48
49         return dev_ofs(area->a_sb, area->a_segno, ofs);
50 }
51
52 static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
53                 int use_filler)
54 {
55         struct logfs_super *super = logfs_super(sb);
56         struct address_space *mapping = super->s_mapping_inode->i_mapping;
57         filler_t *filler = super->s_devops->readpage;
58         struct page *page;
59
60         BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
61         if (use_filler)
62                 page = read_cache_page(mapping, index, filler, sb);
63         else {
64                 page = find_or_create_page(mapping, index, GFP_NOFS);
65                 unlock_page(page);
66         }
67         return page;
68 }
69
70 int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
71                 int use_filler)
72 {
73         pgoff_t index = ofs >> PAGE_SHIFT;
74         struct page *page;
75         long offset = ofs & (PAGE_SIZE-1);
76         long copylen;
77
78         /* Only logfs_wbuf_recover may use len==0 */
79         BUG_ON(!len && !use_filler);
80         do {
81                 copylen = min((ulong)len, PAGE_SIZE - offset);
82
83                 page = get_mapping_page(area->a_sb, index, use_filler);
84                 if (IS_ERR(page))
85                         return PTR_ERR(page);
86                 BUG_ON(!page); /* FIXME: reserve a pool */
87                 SetPageUptodate(page);
88                 memcpy(page_address(page) + offset, buf, copylen);
89
90                 if (!PagePrivate(page)) {
91                         SetPagePrivate(page);
92                         page_cache_get(page);
93                 }
94                 page_cache_release(page);
95
96                 buf += copylen;
97                 len -= copylen;
98                 offset = 0;
99                 index++;
100         } while (len);
101         return 0;
102 }
103
104 static void pad_partial_page(struct logfs_area *area)
105 {
106         struct super_block *sb = area->a_sb;
107         struct page *page;
108         u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
109         pgoff_t index = ofs >> PAGE_SHIFT;
110         long offset = ofs & (PAGE_SIZE-1);
111         u32 len = PAGE_SIZE - offset;
112
113         if (len % PAGE_SIZE) {
114                 page = get_mapping_page(sb, index, 0);
115                 BUG_ON(!page); /* FIXME: reserve a pool */
116                 memset(page_address(page) + offset, 0xff, len);
117                 if (!PagePrivate(page)) {
118                         SetPagePrivate(page);
119                         page_cache_get(page);
120                 }
121                 page_cache_release(page);
122         }
123 }
124
125 static void pad_full_pages(struct logfs_area *area)
126 {
127         struct super_block *sb = area->a_sb;
128         struct logfs_super *super = logfs_super(sb);
129         u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
130         u32 len = super->s_segsize - area->a_used_bytes;
131         pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
132         pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
133         struct page *page;
134
135         while (no_indizes) {
136                 page = get_mapping_page(sb, index, 0);
137                 BUG_ON(!page); /* FIXME: reserve a pool */
138                 SetPageUptodate(page);
139                 memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
140                 if (!PagePrivate(page)) {
141                         SetPagePrivate(page);
142                         page_cache_get(page);
143                 }
144                 page_cache_release(page);
145                 index++;
146                 no_indizes--;
147         }
148 }
149
150 /*
151  * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
152  * Also make sure we allocate (and memset) all pages for final writeout.
153  */
154 static void pad_wbuf(struct logfs_area *area, int final)
155 {
156         pad_partial_page(area);
157         if (final)
158                 pad_full_pages(area);
159 }
160
161 /*
162  * We have to be careful with the alias tree.  Since lookup is done by bix,
163  * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
164  * indirect blocks.  So always use it through accessor functions.
165  */
166 static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
167                 level_t level)
168 {
169         struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
170         pgoff_t index = logfs_pack_index(bix, level);
171
172         return btree_lookup128(head, ino, index);
173 }
174
175 static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
176                 level_t level, void *val)
177 {
178         struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
179         pgoff_t index = logfs_pack_index(bix, level);
180
181         return btree_insert128(head, ino, index, val, GFP_NOFS);
182 }
183
184 static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
185                 write_alias_t *write_one_alias)
186 {
187         struct object_alias_item *item;
188         int err;
189
190         list_for_each_entry(item, &block->item_list, list) {
191                 err = write_alias_journal(sb, block->ino, block->bix,
192                                 block->level, item->child_no, item->val);
193                 if (err)
194                         return err;
195         }
196         return 0;
197 }
198
199 static struct logfs_block_ops btree_block_ops = {
200         .write_block    = btree_write_block,
201         .free_block     = __free_block,
202         .write_alias    = btree_write_alias,
203 };
204
205 int logfs_load_object_aliases(struct super_block *sb,
206                 struct logfs_obj_alias *oa, int count)
207 {
208         struct logfs_super *super = logfs_super(sb);
209         struct logfs_block *block;
210         struct object_alias_item *item;
211         u64 ino, bix;
212         level_t level;
213         int i, err;
214
215         super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
216         count /= sizeof(*oa);
217         for (i = 0; i < count; i++) {
218                 item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
219                 if (!item)
220                         return -ENOMEM;
221                 memset(item, 0, sizeof(*item));
222
223                 super->s_no_object_aliases++;
224                 item->val = oa[i].val;
225                 item->child_no = be16_to_cpu(oa[i].child_no);
226
227                 ino = be64_to_cpu(oa[i].ino);
228                 bix = be64_to_cpu(oa[i].bix);
229                 level = LEVEL(oa[i].level);
230
231                 log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
232                                 ino, bix, level, item->child_no,
233                                 be64_to_cpu(item->val));
234                 block = alias_tree_lookup(sb, ino, bix, level);
235                 if (!block) {
236                         block = __alloc_block(sb, ino, bix, level);
237                         block->ops = &btree_block_ops;
238                         err = alias_tree_insert(sb, ino, bix, level, block);
239                         BUG_ON(err); /* mempool empty */
240                 }
241                 if (test_and_set_bit(item->child_no, block->alias_map)) {
242                         printk(KERN_ERR"LogFS: Alias collision detected\n");
243                         return -EIO;
244                 }
245                 list_move_tail(&block->alias_list, &super->s_object_alias);
246                 list_add(&item->list, &block->item_list);
247         }
248         return 0;
249 }
250
251 static void kill_alias(void *_block, unsigned long ignore0,
252                 u64 ignore1, u64 ignore2, size_t ignore3)
253 {
254         struct logfs_block *block = _block;
255         struct super_block *sb = block->sb;
256         struct logfs_super *super = logfs_super(sb);
257         struct object_alias_item *item;
258
259         while (!list_empty(&block->item_list)) {
260                 item = list_entry(block->item_list.next, typeof(*item), list);
261                 list_del(&item->list);
262                 mempool_free(item, super->s_alias_pool);
263         }
264         block->ops->free_block(sb, block);
265 }
266
267 static int obj_type(struct inode *inode, level_t level)
268 {
269         if (level == 0) {
270                 if (S_ISDIR(inode->i_mode))
271                         return OBJ_DENTRY;
272                 if (inode->i_ino == LOGFS_INO_MASTER)
273                         return OBJ_INODE;
274         }
275         return OBJ_BLOCK;
276 }
277
278 static int obj_len(struct super_block *sb, int obj_type)
279 {
280         switch (obj_type) {
281         case OBJ_DENTRY:
282                 return sizeof(struct logfs_disk_dentry);
283         case OBJ_INODE:
284                 return sizeof(struct logfs_disk_inode);
285         case OBJ_BLOCK:
286                 return sb->s_blocksize;
287         default:
288                 BUG();
289         }
290 }
291
292 static int __logfs_segment_write(struct inode *inode, void *buf,
293                 struct logfs_shadow *shadow, int type, int len, int compr)
294 {
295         struct logfs_area *area;
296         struct super_block *sb = inode->i_sb;
297         s64 ofs;
298         struct logfs_object_header h;
299         int acc_len;
300
301         if (shadow->gc_level == 0)
302                 acc_len = len;
303         else
304                 acc_len = obj_len(sb, type);
305
306         area = get_area(sb, shadow->gc_level);
307         ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
308         LOGFS_BUG_ON(ofs <= 0, sb);
309         /*
310          * Order is important.  logfs_get_free_bytes(), by modifying the
311          * segment file, may modify the content of the very page we're about
312          * to write now.  Which is fine, as long as the calculated crc and
313          * written data still match.  So do the modifications _before_
314          * calculating the crc.
315          */
316
317         h.len   = cpu_to_be16(len);
318         h.type  = type;
319         h.compr = compr;
320         h.ino   = cpu_to_be64(inode->i_ino);
321         h.bix   = cpu_to_be64(shadow->bix);
322         h.crc   = logfs_crc32(&h, sizeof(h) - 4, 4);
323         h.data_crc = logfs_crc32(buf, len, 0);
324
325         logfs_buf_write(area, ofs, &h, sizeof(h));
326         logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
327
328         shadow->new_ofs = ofs;
329         shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
330
331         return 0;
332 }
333
334 static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
335                 struct logfs_shadow *shadow, int type, int len)
336 {
337         struct super_block *sb = inode->i_sb;
338         void *compressor_buf = logfs_super(sb)->s_compressed_je;
339         ssize_t compr_len;
340         int ret;
341
342         mutex_lock(&logfs_super(sb)->s_journal_mutex);
343         compr_len = logfs_compress(buf, compressor_buf, len, len);
344
345         if (compr_len >= 0) {
346                 ret = __logfs_segment_write(inode, compressor_buf, shadow,
347                                 type, compr_len, COMPR_ZLIB);
348         } else {
349                 ret = __logfs_segment_write(inode, buf, shadow, type, len,
350                                 COMPR_NONE);
351         }
352         mutex_unlock(&logfs_super(sb)->s_journal_mutex);
353         return ret;
354 }
355
356 /**
357  * logfs_segment_write - write data block to object store
358  * @inode:              inode containing data
359  *
360  * Returns an errno or zero.
361  */
362 int logfs_segment_write(struct inode *inode, struct page *page,
363                 struct logfs_shadow *shadow)
364 {
365         struct super_block *sb = inode->i_sb;
366         struct logfs_super *super = logfs_super(sb);
367         int do_compress, type, len;
368         int ret;
369         void *buf;
370
371         super->s_flags |= LOGFS_SB_FLAG_DIRTY;
372         BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
373         do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
374         if (shadow->gc_level != 0) {
375                 /* temporarily disable compression for indirect blocks */
376                 do_compress = 0;
377         }
378
379         type = obj_type(inode, shrink_level(shadow->gc_level));
380         len = obj_len(sb, type);
381         buf = kmap(page);
382         if (do_compress)
383                 ret = logfs_segment_write_compress(inode, buf, shadow, type,
384                                 len);
385         else
386                 ret = __logfs_segment_write(inode, buf, shadow, type, len,
387                                 COMPR_NONE);
388         kunmap(page);
389
390         log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
391                         shadow->ino, shadow->bix, shadow->gc_level,
392                         shadow->old_ofs, shadow->new_ofs,
393                         shadow->old_len, shadow->new_len);
394         /* this BUG_ON did catch a locking bug.  useful */
395         BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
396         return ret;
397 }
398
399 int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
400 {
401         pgoff_t index = ofs >> PAGE_SHIFT;
402         struct page *page;
403         long offset = ofs & (PAGE_SIZE-1);
404         long copylen;
405
406         while (len) {
407                 copylen = min((ulong)len, PAGE_SIZE - offset);
408
409                 page = get_mapping_page(sb, index, 1);
410                 if (IS_ERR(page))
411                         return PTR_ERR(page);
412                 memcpy(buf, page_address(page) + offset, copylen);
413                 page_cache_release(page);
414
415                 buf += copylen;
416                 len -= copylen;
417                 offset = 0;
418                 index++;
419         }
420         return 0;
421 }
422
423 /*
424  * The "position" of indirect blocks is ambiguous.  It can be the position
425  * of any data block somewhere behind this indirect block.  So we need to
426  * normalize the positions through logfs_block_mask() before comparing.
427  */
428 static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
429 {
430         return  (pos1 & logfs_block_mask(sb, level)) !=
431                 (pos2 & logfs_block_mask(sb, level));
432 }
433
434 #if 0
435 static int read_seg_header(struct super_block *sb, u64 ofs,
436                 struct logfs_segment_header *sh)
437 {
438         __be32 crc;
439         int err;
440
441         err = wbuf_read(sb, ofs, sizeof(*sh), sh);
442         if (err)
443                 return err;
444         crc = logfs_crc32(sh, sizeof(*sh), 4);
445         if (crc != sh->crc) {
446                 printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
447                                 "got %x\n", ofs, be32_to_cpu(sh->crc),
448                                 be32_to_cpu(crc));
449                 return -EIO;
450         }
451         return 0;
452 }
453 #endif
454
455 static int read_obj_header(struct super_block *sb, u64 ofs,
456                 struct logfs_object_header *oh)
457 {
458         __be32 crc;
459         int err;
460
461         err = wbuf_read(sb, ofs, sizeof(*oh), oh);
462         if (err)
463                 return err;
464         crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
465         if (crc != oh->crc) {
466                 printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
467                                 "got %x\n", ofs, be32_to_cpu(oh->crc),
468                                 be32_to_cpu(crc));
469                 return -EIO;
470         }
471         return 0;
472 }
473
474 static void move_btree_to_page(struct inode *inode, struct page *page,
475                 __be64 *data)
476 {
477         struct super_block *sb = inode->i_sb;
478         struct logfs_super *super = logfs_super(sb);
479         struct btree_head128 *head = &super->s_object_alias_tree;
480         struct logfs_block *block;
481         struct object_alias_item *item, *next;
482
483         if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
484                 return;
485
486         block = btree_remove128(head, inode->i_ino, page->index);
487         if (!block)
488                 return;
489
490         log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
491                         block->ino, block->bix, block->level);
492         list_for_each_entry_safe(item, next, &block->item_list, list) {
493                 data[item->child_no] = item->val;
494                 list_del(&item->list);
495                 mempool_free(item, super->s_alias_pool);
496         }
497         block->page = page;
498
499         if (!PagePrivate(page)) {
500                 SetPagePrivate(page);
501                 page_cache_get(page);
502                 set_page_private(page, (unsigned long) block);
503         }
504         block->ops = &indirect_block_ops;
505         initialize_block_counters(page, block, data, 0);
506 }
507
508 /*
509  * This silences a false, yet annoying gcc warning.  I hate it when my editor
510  * jumps into bitops.h each time I recompile this file.
511  * TODO: Complain to gcc folks about this and upgrade compiler.
512  */
513 static unsigned long fnb(const unsigned long *addr,
514                 unsigned long size, unsigned long offset)
515 {
516         return find_next_bit(addr, size, offset);
517 }
518
519 void move_page_to_btree(struct page *page)
520 {
521         struct logfs_block *block = logfs_block(page);
522         struct super_block *sb = block->sb;
523         struct logfs_super *super = logfs_super(sb);
524         struct object_alias_item *item;
525         unsigned long pos;
526         __be64 *child;
527         int err;
528
529         if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
530                 block->ops->free_block(sb, block);
531                 return;
532         }
533         log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
534                         block->ino, block->bix, block->level);
535         super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
536
537         for (pos = 0; ; pos++) {
538                 pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
539                 if (pos >= LOGFS_BLOCK_FACTOR)
540                         break;
541
542                 item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
543                 BUG_ON(!item); /* mempool empty */
544                 memset(item, 0, sizeof(*item));
545
546                 child = kmap_atomic(page);
547                 item->val = child[pos];
548                 kunmap_atomic(child);
549                 item->child_no = pos;
550                 list_add(&item->list, &block->item_list);
551         }
552         block->page = NULL;
553
554         if (PagePrivate(page)) {
555                 ClearPagePrivate(page);
556                 page_cache_release(page);
557                 set_page_private(page, 0);
558         }
559         block->ops = &btree_block_ops;
560         err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
561                         block);
562         BUG_ON(err); /* mempool empty */
563         ClearPageUptodate(page);
564 }
565
566 static int __logfs_segment_read(struct inode *inode, void *buf,
567                 u64 ofs, u64 bix, level_t level)
568 {
569         struct super_block *sb = inode->i_sb;
570         void *compressor_buf = logfs_super(sb)->s_compressed_je;
571         struct logfs_object_header oh;
572         __be32 crc;
573         u16 len;
574         int err, block_len;
575
576         block_len = obj_len(sb, obj_type(inode, level));
577         err = read_obj_header(sb, ofs, &oh);
578         if (err)
579                 goto out_err;
580
581         err = -EIO;
582         if (be64_to_cpu(oh.ino) != inode->i_ino
583                         || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
584                 printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
585                                 "expected (%lx, %llx), got (%llx, %llx)\n",
586                                 ofs, inode->i_ino, bix,
587                                 be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
588                 goto out_err;
589         }
590
591         len = be16_to_cpu(oh.len);
592
593         switch (oh.compr) {
594         case COMPR_NONE:
595                 err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
596                 if (err)
597                         goto out_err;
598                 crc = logfs_crc32(buf, len, 0);
599                 if (crc != oh.data_crc) {
600                         printk(KERN_ERR"LOGFS: uncompressed data crc error at "
601                                         "%llx: expected %x, got %x\n", ofs,
602                                         be32_to_cpu(oh.data_crc),
603                                         be32_to_cpu(crc));
604                         goto out_err;
605                 }
606                 break;
607         case COMPR_ZLIB:
608                 mutex_lock(&logfs_super(sb)->s_journal_mutex);
609                 err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
610                                 compressor_buf);
611                 if (err) {
612                         mutex_unlock(&logfs_super(sb)->s_journal_mutex);
613                         goto out_err;
614                 }
615                 crc = logfs_crc32(compressor_buf, len, 0);
616                 if (crc != oh.data_crc) {
617                         printk(KERN_ERR"LOGFS: compressed data crc error at "
618                                         "%llx: expected %x, got %x\n", ofs,
619                                         be32_to_cpu(oh.data_crc),
620                                         be32_to_cpu(crc));
621                         mutex_unlock(&logfs_super(sb)->s_journal_mutex);
622                         goto out_err;
623                 }
624                 err = logfs_uncompress(compressor_buf, buf, len, block_len);
625                 mutex_unlock(&logfs_super(sb)->s_journal_mutex);
626                 if (err) {
627                         printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
628                         goto out_err;
629                 }
630                 break;
631         default:
632                 LOGFS_BUG(sb);
633                 err = -EIO;
634                 goto out_err;
635         }
636         return 0;
637
638 out_err:
639         logfs_set_ro(sb);
640         printk(KERN_ERR"LOGFS: device is read-only now\n");
641         LOGFS_BUG(sb);
642         return err;
643 }
644
645 /**
646  * logfs_segment_read - read data block from object store
647  * @inode:              inode containing data
648  * @buf:                data buffer
649  * @ofs:                physical data offset
650  * @bix:                block index
651  * @level:              block level
652  *
653  * Returns 0 on success or a negative errno.
654  */
655 int logfs_segment_read(struct inode *inode, struct page *page,
656                 u64 ofs, u64 bix, level_t level)
657 {
658         int err;
659         void *buf;
660
661         if (PageUptodate(page))
662                 return 0;
663
664         ofs &= ~LOGFS_FULLY_POPULATED;
665
666         buf = kmap(page);
667         err = __logfs_segment_read(inode, buf, ofs, bix, level);
668         if (!err) {
669                 move_btree_to_page(inode, page, buf);
670                 SetPageUptodate(page);
671         }
672         kunmap(page);
673         log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
674                         inode->i_ino, bix, level, ofs, err);
675         return err;
676 }
677
678 int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
679 {
680         struct super_block *sb = inode->i_sb;
681         struct logfs_super *super = logfs_super(sb);
682         struct logfs_object_header h;
683         u16 len;
684         int err;
685
686         super->s_flags |= LOGFS_SB_FLAG_DIRTY;
687         BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
688         BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
689         if (!shadow->old_ofs)
690                 return 0;
691
692         log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
693                         shadow->ino, shadow->bix, shadow->gc_level,
694                         shadow->old_ofs, shadow->new_ofs,
695                         shadow->old_len, shadow->new_len);
696         err = read_obj_header(sb, shadow->old_ofs, &h);
697         LOGFS_BUG_ON(err, sb);
698         LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
699         LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
700                                 shrink_level(shadow->gc_level)), sb);
701
702         if (shadow->gc_level == 0)
703                 len = be16_to_cpu(h.len);
704         else
705                 len = obj_len(sb, h.type);
706         shadow->old_len = len + sizeof(h);
707         return 0;
708 }
709
710 void freeseg(struct super_block *sb, u32 segno)
711 {
712         struct logfs_super *super = logfs_super(sb);
713         struct address_space *mapping = super->s_mapping_inode->i_mapping;
714         struct page *page;
715         u64 ofs, start, end;
716
717         start = dev_ofs(sb, segno, 0);
718         end = dev_ofs(sb, segno + 1, 0);
719         for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
720                 page = find_get_page(mapping, ofs >> PAGE_SHIFT);
721                 if (!page)
722                         continue;
723                 if (PagePrivate(page)) {
724                         ClearPagePrivate(page);
725                         page_cache_release(page);
726                 }
727                 page_cache_release(page);
728         }
729 }
730
731 int logfs_open_area(struct logfs_area *area, size_t bytes)
732 {
733         struct super_block *sb = area->a_sb;
734         struct logfs_super *super = logfs_super(sb);
735         int err, closed = 0;
736
737         if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
738                 return 0;
739
740         if (area->a_is_open) {
741                 u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
742                 u32 len = super->s_segsize - area->a_written_bytes;
743
744                 log_gc("logfs_close_area(%x)\n", area->a_segno);
745                 pad_wbuf(area, 1);
746                 super->s_devops->writeseg(area->a_sb, ofs, len);
747                 freeseg(sb, area->a_segno);
748                 closed = 1;
749         }
750
751         area->a_used_bytes = 0;
752         area->a_written_bytes = 0;
753 again:
754         area->a_ops->get_free_segment(area);
755         area->a_ops->get_erase_count(area);
756
757         log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
758         err = area->a_ops->erase_segment(area);
759         if (err) {
760                 printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
761                                 area->a_segno);
762                 logfs_mark_segment_bad(sb, area->a_segno);
763                 goto again;
764         }
765         area->a_is_open = 1;
766         return closed;
767 }
768
769 void logfs_sync_area(struct logfs_area *area)
770 {
771         struct super_block *sb = area->a_sb;
772         struct logfs_super *super = logfs_super(sb);
773         u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
774         u32 len = (area->a_used_bytes - area->a_written_bytes);
775
776         if (super->s_writesize)
777                 len &= ~(super->s_writesize - 1);
778         if (len == 0)
779                 return;
780         pad_wbuf(area, 0);
781         super->s_devops->writeseg(sb, ofs, len);
782         area->a_written_bytes += len;
783 }
784
785 void logfs_sync_segments(struct super_block *sb)
786 {
787         struct logfs_super *super = logfs_super(sb);
788         int i;
789
790         for_each_area(i)
791                 logfs_sync_area(super->s_area[i]);
792 }
793
794 /*
795  * Pick a free segment to be used for this area.  Effectively takes a
796  * candidate from the free list (not really a candidate anymore).
797  */
798 static void ostore_get_free_segment(struct logfs_area *area)
799 {
800         struct super_block *sb = area->a_sb;
801         struct logfs_super *super = logfs_super(sb);
802
803         if (super->s_free_list.count == 0) {
804                 printk(KERN_ERR"LOGFS: ran out of free segments\n");
805                 LOGFS_BUG(sb);
806         }
807
808         area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
809 }
810
811 static void ostore_get_erase_count(struct logfs_area *area)
812 {
813         struct logfs_segment_entry se;
814         u32 ec_level;
815
816         logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
817         BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
818                         se.valid == cpu_to_be32(RESERVED));
819
820         ec_level = be32_to_cpu(se.ec_level);
821         area->a_erase_count = (ec_level >> 4) + 1;
822 }
823
824 static int ostore_erase_segment(struct logfs_area *area)
825 {
826         struct super_block *sb = area->a_sb;
827         struct logfs_segment_header sh;
828         u64 ofs;
829         int err;
830
831         err = logfs_erase_segment(sb, area->a_segno, 0);
832         if (err)
833                 return err;
834
835         sh.pad = 0;
836         sh.type = SEG_OSTORE;
837         sh.level = (__force u8)area->a_level;
838         sh.segno = cpu_to_be32(area->a_segno);
839         sh.ec = cpu_to_be32(area->a_erase_count);
840         sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
841         sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
842
843         logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
844                         area->a_level);
845
846         ofs = dev_ofs(sb, area->a_segno, 0);
847         area->a_used_bytes = sizeof(sh);
848         logfs_buf_write(area, ofs, &sh, sizeof(sh));
849         return 0;
850 }
851
852 static const struct logfs_area_ops ostore_area_ops = {
853         .get_free_segment       = ostore_get_free_segment,
854         .get_erase_count        = ostore_get_erase_count,
855         .erase_segment          = ostore_erase_segment,
856 };
857
858 static void free_area(struct logfs_area *area)
859 {
860         if (area)
861                 freeseg(area->a_sb, area->a_segno);
862         kfree(area);
863 }
864
865 void free_areas(struct super_block *sb)
866 {
867         struct logfs_super *super = logfs_super(sb);
868         int i;
869
870         for_each_area(i)
871                 free_area(super->s_area[i]);
872         free_area(super->s_journal_area);
873 }
874
875 static struct logfs_area *alloc_area(struct super_block *sb)
876 {
877         struct logfs_area *area;
878
879         area = kzalloc(sizeof(*area), GFP_KERNEL);
880         if (!area)
881                 return NULL;
882
883         area->a_sb = sb;
884         return area;
885 }
886
887 static void map_invalidatepage(struct page *page, unsigned long l)
888 {
889         BUG();
890 }
891
892 static int map_releasepage(struct page *page, gfp_t g)
893 {
894         /* Don't release these pages */
895         return 0;
896 }
897
898 static const struct address_space_operations mapping_aops = {
899         .invalidatepage = map_invalidatepage,
900         .releasepage    = map_releasepage,
901         .set_page_dirty = __set_page_dirty_nobuffers,
902 };
903
904 int logfs_init_mapping(struct super_block *sb)
905 {
906         struct logfs_super *super = logfs_super(sb);
907         struct address_space *mapping;
908         struct inode *inode;
909
910         inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
911         if (IS_ERR(inode))
912                 return PTR_ERR(inode);
913         super->s_mapping_inode = inode;
914         mapping = inode->i_mapping;
915         mapping->a_ops = &mapping_aops;
916         /* Would it be possible to use __GFP_HIGHMEM as well? */
917         mapping_set_gfp_mask(mapping, GFP_NOFS);
918         return 0;
919 }
920
921 int logfs_init_areas(struct super_block *sb)
922 {
923         struct logfs_super *super = logfs_super(sb);
924         int i = -1;
925
926         super->s_alias_pool = mempool_create_kmalloc_pool(600,
927                         sizeof(struct object_alias_item));
928         if (!super->s_alias_pool)
929                 return -ENOMEM;
930
931         super->s_journal_area = alloc_area(sb);
932         if (!super->s_journal_area)
933                 goto err;
934
935         for_each_area(i) {
936                 super->s_area[i] = alloc_area(sb);
937                 if (!super->s_area[i])
938                         goto err;
939                 super->s_area[i]->a_level = GC_LEVEL(i);
940                 super->s_area[i]->a_ops = &ostore_area_ops;
941         }
942         btree_init_mempool128(&super->s_object_alias_tree,
943                         super->s_btree_pool);
944         return 0;
945
946 err:
947         for (i--; i >= 0; i--)
948                 free_area(super->s_area[i]);
949         free_area(super->s_journal_area);
950         logfs_mempool_destroy(super->s_alias_pool);
951         return -ENOMEM;
952 }
953
954 void logfs_cleanup_areas(struct super_block *sb)
955 {
956         struct logfs_super *super = logfs_super(sb);
957
958         btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
959 }