]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/lightnvm/pblk-gc.c
blk: remove bio_set arg from blk_queue_split()
[karo-tx-linux.git] / drivers / lightnvm / pblk-gc.c
1 /*
2  * Copyright (C) 2016 CNEX Labs
3  * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4  *                  Matias Bjorling <matias@cnexlabs.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License version
8  * 2 as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * pblk-gc.c - pblk's garbage collector
16  */
17
18 #include "pblk.h"
19 #include <linux/delay.h>
20
21 static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
22 {
23         kfree(gc_rq->data);
24         kfree(gc_rq->lba_list);
25         kfree(gc_rq);
26 }
27
28 static int pblk_gc_write(struct pblk *pblk)
29 {
30         struct pblk_gc *gc = &pblk->gc;
31         struct pblk_gc_rq *gc_rq, *tgc_rq;
32         LIST_HEAD(w_list);
33
34         spin_lock(&gc->w_lock);
35         if (list_empty(&gc->w_list)) {
36                 spin_unlock(&gc->w_lock);
37                 return 1;
38         }
39
40         list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
41                 list_move_tail(&gc_rq->list, &w_list);
42                 gc->w_entries--;
43         }
44         spin_unlock(&gc->w_lock);
45
46         list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
47                 pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
48                                 gc_rq->nr_secs, gc_rq->secs_to_gc,
49                                 gc_rq->line, PBLK_IOTYPE_GC);
50
51                 kref_put(&gc_rq->line->ref, pblk_line_put);
52
53                 list_del(&gc_rq->list);
54                 pblk_gc_free_gc_rq(gc_rq);
55         }
56
57         return 0;
58 }
59
60 static void pblk_gc_writer_kick(struct pblk_gc *gc)
61 {
62         wake_up_process(gc->gc_writer_ts);
63 }
64
65 /*
66  * Responsible for managing all memory related to a gc request. Also in case of
67  * failure
68  */
69 static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
70                                    u64 *lba_list, unsigned int nr_secs)
71 {
72         struct nvm_tgt_dev *dev = pblk->dev;
73         struct nvm_geo *geo = &dev->geo;
74         struct pblk_gc *gc = &pblk->gc;
75         struct pblk_gc_rq *gc_rq;
76         void *data;
77         unsigned int secs_to_gc;
78         int ret = NVM_IO_OK;
79
80         data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
81         if (!data) {
82                 ret = NVM_IO_ERR;
83                 goto free_lba_list;
84         }
85
86         /* Read from GC victim block */
87         if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
88                                                         &secs_to_gc, line)) {
89                 ret = NVM_IO_ERR;
90                 goto free_data;
91         }
92
93         if (!secs_to_gc)
94                 goto free_data;
95
96         gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
97         if (!gc_rq) {
98                 ret = NVM_IO_ERR;
99                 goto free_data;
100         }
101
102         gc_rq->line = line;
103         gc_rq->data = data;
104         gc_rq->lba_list = lba_list;
105         gc_rq->nr_secs = nr_secs;
106         gc_rq->secs_to_gc = secs_to_gc;
107
108         kref_get(&line->ref);
109
110 retry:
111         spin_lock(&gc->w_lock);
112         if (gc->w_entries > 256) {
113                 spin_unlock(&gc->w_lock);
114                 usleep_range(256, 1024);
115                 goto retry;
116         }
117         gc->w_entries++;
118         list_add_tail(&gc_rq->list, &gc->w_list);
119         spin_unlock(&gc->w_lock);
120
121         pblk_gc_writer_kick(&pblk->gc);
122
123         return NVM_IO_OK;
124
125 free_data:
126         kfree(data);
127 free_lba_list:
128         kfree(lba_list);
129
130         return ret;
131 }
132
133 static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
134 {
135         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
136         struct list_head *move_list;
137
138         spin_lock(&line->lock);
139         WARN_ON(line->state != PBLK_LINESTATE_GC);
140         line->state = PBLK_LINESTATE_CLOSED;
141         move_list = pblk_line_gc_list(pblk, line);
142         spin_unlock(&line->lock);
143
144         if (move_list) {
145                 spin_lock(&l_mg->gc_lock);
146                 list_add_tail(&line->list, move_list);
147                 spin_unlock(&l_mg->gc_lock);
148         }
149 }
150
151 static void pblk_gc_line_ws(struct work_struct *work)
152 {
153         struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
154                                                                         ws);
155         struct pblk *pblk = line_ws->pblk;
156         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
157         struct pblk_line *line = line_ws->line;
158         struct pblk_line_meta *lm = &pblk->lm;
159         __le64 *lba_list = line_ws->priv;
160         u64 *gc_list;
161         int sec_left;
162         int nr_ppas, bit;
163         int put_line = 1;
164
165         pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
166
167         spin_lock(&line->lock);
168         sec_left = line->vsc;
169         if (!sec_left) {
170                 /* Lines are erased before being used (l_mg->data_/log_next) */
171                 spin_unlock(&line->lock);
172                 goto out;
173         }
174         spin_unlock(&line->lock);
175
176         if (sec_left < 0) {
177                 pr_err("pblk: corrupted GC line (%d)\n", line->id);
178                 put_line = 0;
179                 pblk_put_line_back(pblk, line);
180                 goto out;
181         }
182
183         bit = -1;
184 next_rq:
185         gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
186         if (!gc_list) {
187                 put_line = 0;
188                 pblk_put_line_back(pblk, line);
189                 goto out;
190         }
191
192         nr_ppas = 0;
193         do {
194                 bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
195                                                                 bit + 1);
196                 if (bit > line->emeta_ssec)
197                         break;
198
199                 gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
200         } while (nr_ppas < pblk->max_write_pgs);
201
202         if (unlikely(!nr_ppas)) {
203                 kfree(gc_list);
204                 goto out;
205         }
206
207         if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
208                 pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
209                                                 line->id, line->vsc,
210                                                 nr_ppas, nr_ppas);
211                 put_line = 0;
212                 pblk_put_line_back(pblk, line);
213                 goto out;
214         }
215
216         sec_left -= nr_ppas;
217         if (sec_left > 0)
218                 goto next_rq;
219
220 out:
221         pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
222         mempool_free(line_ws, pblk->line_ws_pool);
223         atomic_dec(&pblk->gc.inflight_gc);
224         if (put_line)
225                 kref_put(&line->ref, pblk_line_put);
226 }
227
228 static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
229 {
230         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
231         struct pblk_line_meta *lm = &pblk->lm;
232         struct pblk_line_ws *line_ws;
233         __le64 *lba_list;
234         int ret;
235
236         line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
237         line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
238                                                                 GFP_KERNEL);
239         if (!line->emeta) {
240                 pr_err("pblk: cannot use GC emeta\n");
241                 goto fail_free_ws;
242         }
243
244         ret = pblk_line_read_emeta(pblk, line);
245         if (ret) {
246                 pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
247                 goto fail_free_emeta;
248         }
249
250         /* If this read fails, it means that emeta is corrupted. For now, leave
251          * the line untouched. TODO: Implement a recovery routine that scans and
252          * moves all sectors on the line.
253          */
254         lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
255         if (!lba_list) {
256                 pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
257                 goto fail_free_emeta;
258         }
259
260         line_ws->pblk = pblk;
261         line_ws->line = line;
262         line_ws->priv = lba_list;
263
264         INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
265         queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
266
267         return 0;
268
269 fail_free_emeta:
270         pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
271 fail_free_ws:
272         mempool_free(line_ws, pblk->line_ws_pool);
273         pblk_put_line_back(pblk, line);
274
275         return 1;
276 }
277
278 static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
279 {
280         struct pblk_line *line, *tline;
281
282         list_for_each_entry_safe(line, tline, gc_list, list) {
283                 if (pblk_gc_line(pblk, line))
284                         pr_err("pblk: failed to GC line %d\n", line->id);
285                 list_del(&line->list);
286         }
287 }
288
289 /*
290  * Lines with no valid sectors will be returned to the free list immediately. If
291  * GC is activated - either because the free block count is under the determined
292  * threshold, or because it is being forced from user space - only lines with a
293  * high count of invalid sectors will be recycled.
294  */
295 static void pblk_gc_run(struct pblk *pblk)
296 {
297         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
298         struct pblk_gc *gc = &pblk->gc;
299         struct pblk_line *line, *tline;
300         unsigned int nr_blocks_free, nr_blocks_need;
301         struct list_head *group_list;
302         int run_gc, gc_group = 0;
303         int prev_gc = 0;
304         int inflight_gc = atomic_read(&gc->inflight_gc);
305         LIST_HEAD(gc_list);
306
307         spin_lock(&l_mg->gc_lock);
308         list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
309                 spin_lock(&line->lock);
310                 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
311                 line->state = PBLK_LINESTATE_GC;
312                 spin_unlock(&line->lock);
313
314                 list_del(&line->list);
315                 kref_put(&line->ref, pblk_line_put);
316         }
317         spin_unlock(&l_mg->gc_lock);
318
319         nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
320         nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
321         run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
322
323 next_gc_group:
324         group_list = l_mg->gc_lists[gc_group++];
325         spin_lock(&l_mg->gc_lock);
326         while (run_gc && !list_empty(group_list)) {
327                 /* No need to queue up more GC lines than we can handle */
328                 if (!run_gc || inflight_gc > gc->gc_jobs_active) {
329                         spin_unlock(&l_mg->gc_lock);
330                         pblk_gc_lines(pblk, &gc_list);
331                         return;
332                 }
333
334                 line = list_first_entry(group_list, struct pblk_line, list);
335                 nr_blocks_free += atomic_read(&line->blk_in_line);
336
337                 spin_lock(&line->lock);
338                 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
339                 line->state = PBLK_LINESTATE_GC;
340                 list_move_tail(&line->list, &gc_list);
341                 atomic_inc(&gc->inflight_gc);
342                 inflight_gc++;
343                 spin_unlock(&line->lock);
344
345                 prev_gc = 1;
346                 run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
347         }
348         spin_unlock(&l_mg->gc_lock);
349
350         pblk_gc_lines(pblk, &gc_list);
351
352         if (!prev_gc && pblk->rl.rb_state > gc_group &&
353                                                 gc_group < PBLK_NR_GC_LISTS)
354                 goto next_gc_group;
355 }
356
357
358 static void pblk_gc_kick(struct pblk *pblk)
359 {
360         struct pblk_gc *gc = &pblk->gc;
361
362         wake_up_process(gc->gc_ts);
363         pblk_gc_writer_kick(gc);
364         mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
365 }
366
367 static void pblk_gc_timer(unsigned long data)
368 {
369         struct pblk *pblk = (struct pblk *)data;
370
371         pblk_gc_kick(pblk);
372 }
373
374 static int pblk_gc_ts(void *data)
375 {
376         struct pblk *pblk = data;
377
378         while (!kthread_should_stop()) {
379                 pblk_gc_run(pblk);
380                 set_current_state(TASK_INTERRUPTIBLE);
381                 io_schedule();
382         }
383
384         return 0;
385 }
386
387 static int pblk_gc_writer_ts(void *data)
388 {
389         struct pblk *pblk = data;
390
391         while (!kthread_should_stop()) {
392                 if (!pblk_gc_write(pblk))
393                         continue;
394                 set_current_state(TASK_INTERRUPTIBLE);
395                 io_schedule();
396         }
397
398         return 0;
399 }
400
401 static void pblk_gc_start(struct pblk *pblk)
402 {
403         pblk->gc.gc_active = 1;
404
405         pr_debug("pblk: gc start\n");
406 }
407
408 int pblk_gc_status(struct pblk *pblk)
409 {
410         struct pblk_gc *gc = &pblk->gc;
411         int ret;
412
413         spin_lock(&gc->lock);
414         ret = gc->gc_active;
415         spin_unlock(&gc->lock);
416
417         return ret;
418 }
419
420 static void __pblk_gc_should_start(struct pblk *pblk)
421 {
422         struct pblk_gc *gc = &pblk->gc;
423
424         lockdep_assert_held(&gc->lock);
425
426         if (gc->gc_enabled && !gc->gc_active)
427                 pblk_gc_start(pblk);
428 }
429
430 void pblk_gc_should_start(struct pblk *pblk)
431 {
432         struct pblk_gc *gc = &pblk->gc;
433
434         spin_lock(&gc->lock);
435         __pblk_gc_should_start(pblk);
436         spin_unlock(&gc->lock);
437 }
438
439 /*
440  * If flush_wq == 1 then no lock should be held by the caller since
441  * flush_workqueue can sleep
442  */
443 static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
444 {
445         spin_lock(&pblk->gc.lock);
446         pblk->gc.gc_active = 0;
447         spin_unlock(&pblk->gc.lock);
448
449         pr_debug("pblk: gc stop\n");
450 }
451
452 void pblk_gc_should_stop(struct pblk *pblk)
453 {
454         struct pblk_gc *gc = &pblk->gc;
455
456         if (gc->gc_active && !gc->gc_forced)
457                 pblk_gc_stop(pblk, 0);
458 }
459
460 void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
461                               int *gc_active)
462 {
463         struct pblk_gc *gc = &pblk->gc;
464
465         spin_lock(&gc->lock);
466         *gc_enabled = gc->gc_enabled;
467         *gc_active = gc->gc_active;
468         spin_unlock(&gc->lock);
469 }
470
471 void pblk_gc_sysfs_force(struct pblk *pblk, int force)
472 {
473         struct pblk_gc *gc = &pblk->gc;
474         int rsv = 0;
475
476         spin_lock(&gc->lock);
477         if (force) {
478                 gc->gc_enabled = 1;
479                 rsv = 64;
480         }
481         pblk_rl_set_gc_rsc(&pblk->rl, rsv);
482         gc->gc_forced = force;
483         __pblk_gc_should_start(pblk);
484         spin_unlock(&gc->lock);
485 }
486
487 int pblk_gc_init(struct pblk *pblk)
488 {
489         struct pblk_gc *gc = &pblk->gc;
490         int ret;
491
492         gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
493         if (IS_ERR(gc->gc_ts)) {
494                 pr_err("pblk: could not allocate GC main kthread\n");
495                 return PTR_ERR(gc->gc_ts);
496         }
497
498         gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
499                                                         "pblk-gc-writer-ts");
500         if (IS_ERR(gc->gc_writer_ts)) {
501                 pr_err("pblk: could not allocate GC writer kthread\n");
502                 ret = PTR_ERR(gc->gc_writer_ts);
503                 goto fail_free_main_kthread;
504         }
505
506         setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
507         mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
508
509         gc->gc_active = 0;
510         gc->gc_forced = 0;
511         gc->gc_enabled = 1;
512         gc->gc_jobs_active = 8;
513         gc->w_entries = 0;
514         atomic_set(&gc->inflight_gc, 0);
515
516         gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
517                         WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
518         if (!gc->gc_reader_wq) {
519                 pr_err("pblk: could not allocate GC reader workqueue\n");
520                 ret = -ENOMEM;
521                 goto fail_free_writer_kthread;
522         }
523
524         spin_lock_init(&gc->lock);
525         spin_lock_init(&gc->w_lock);
526         INIT_LIST_HEAD(&gc->w_list);
527
528         return 0;
529
530 fail_free_writer_kthread:
531         kthread_stop(gc->gc_writer_ts);
532 fail_free_main_kthread:
533         kthread_stop(gc->gc_ts);
534
535         return ret;
536 }
537
538 void pblk_gc_exit(struct pblk *pblk)
539 {
540         struct pblk_gc *gc = &pblk->gc;
541
542         flush_workqueue(gc->gc_reader_wq);
543
544         del_timer(&gc->gc_timer);
545         pblk_gc_stop(pblk, 1);
546
547         if (gc->gc_ts)
548                 kthread_stop(gc->gc_ts);
549
550         if (pblk->gc.gc_reader_wq)
551                 destroy_workqueue(pblk->gc.gc_reader_wq);
552
553         if (gc->gc_writer_ts)
554                 kthread_stop(gc->gc_writer_ts);
555 }