]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/lightnvm/pblk-gc.c
9b4059b93855d702f956a3ec4e0a46fd849141a5
[karo-tx-linux.git] / drivers / lightnvm / pblk-gc.c
1 /*
2  * Copyright (C) 2016 CNEX Labs
3  * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4  *                  Matias Bjorling <matias@cnexlabs.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License version
8  * 2 as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * pblk-gc.c - pblk's garbage collector
16  */
17
18 #include "pblk.h"
19 #include <linux/delay.h>
20
21 static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
22 {
23         kfree(gc_rq->data);
24         kfree(gc_rq);
25 }
26
27 static int pblk_gc_write(struct pblk *pblk)
28 {
29         struct pblk_gc *gc = &pblk->gc;
30         struct pblk_gc_rq *gc_rq, *tgc_rq;
31         LIST_HEAD(w_list);
32
33         spin_lock(&gc->w_lock);
34         if (list_empty(&gc->w_list)) {
35                 spin_unlock(&gc->w_lock);
36                 return 1;
37         }
38
39         list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
40         gc->w_entries = 0;
41         spin_unlock(&gc->w_lock);
42
43         list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
44                 pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
45                                 gc_rq->nr_secs, gc_rq->secs_to_gc,
46                                 gc_rq->line, PBLK_IOTYPE_GC);
47
48                 list_del(&gc_rq->list);
49                 kref_put(&gc_rq->line->ref, pblk_line_put);
50                 pblk_gc_free_gc_rq(gc_rq);
51         }
52
53         return 0;
54 }
55
56 static void pblk_gc_writer_kick(struct pblk_gc *gc)
57 {
58         wake_up_process(gc->gc_writer_ts);
59 }
60
61 /*
62  * Responsible for managing all memory related to a gc request. Also in case of
63  * failure
64  */
65 static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
66 {
67         struct nvm_tgt_dev *dev = pblk->dev;
68         struct nvm_geo *geo = &dev->geo;
69         struct pblk_gc *gc = &pblk->gc;
70         struct pblk_line *line = gc_rq->line;
71         void *data;
72         unsigned int secs_to_gc;
73         int ret = 0;
74
75         data = kmalloc(gc_rq->nr_secs * geo->sec_size, GFP_KERNEL);
76         if (!data) {
77                 ret = -ENOMEM;
78                 goto out;
79         }
80
81         /* Read from GC victim block */
82         if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
83                                                         &secs_to_gc, line)) {
84                 ret = -EFAULT;
85                 goto free_data;
86         }
87
88         if (!secs_to_gc)
89                 goto free_rq;
90
91         gc_rq->data = data;
92         gc_rq->secs_to_gc = secs_to_gc;
93
94 retry:
95         spin_lock(&gc->w_lock);
96         if (gc->w_entries >= PBLK_GC_W_QD) {
97                 spin_unlock(&gc->w_lock);
98                 pblk_gc_writer_kick(&pblk->gc);
99                 usleep_range(128, 256);
100                 goto retry;
101         }
102         gc->w_entries++;
103         list_add_tail(&gc_rq->list, &gc->w_list);
104         spin_unlock(&gc->w_lock);
105
106         pblk_gc_writer_kick(&pblk->gc);
107
108         return 0;
109
110 free_rq:
111         kfree(gc_rq);
112 free_data:
113         kfree(data);
114 out:
115         kref_put(&line->ref, pblk_line_put);
116         return ret;
117 }
118
119 static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
120 {
121         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
122         struct list_head *move_list;
123
124         spin_lock(&line->lock);
125         WARN_ON(line->state != PBLK_LINESTATE_GC);
126         line->state = PBLK_LINESTATE_CLOSED;
127         move_list = pblk_line_gc_list(pblk, line);
128         spin_unlock(&line->lock);
129
130         if (move_list) {
131                 spin_lock(&l_mg->gc_lock);
132                 list_add_tail(&line->list, move_list);
133                 spin_unlock(&l_mg->gc_lock);
134         }
135 }
136
137 static void pblk_gc_line_ws(struct work_struct *work)
138 {
139         struct pblk_line_ws *line_rq_ws = container_of(work,
140                                                 struct pblk_line_ws, ws);
141         struct pblk *pblk = line_rq_ws->pblk;
142         struct pblk_gc *gc = &pblk->gc;
143         struct pblk_line *line = line_rq_ws->line;
144         struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
145
146         up(&gc->gc_sem);
147
148         if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
149                 pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
150                                                 line->id, *line->vsc,
151                                                 gc_rq->nr_secs);
152         }
153
154         mempool_free(line_rq_ws, pblk->line_ws_pool);
155 }
156
157 static void pblk_gc_line_prepare_ws(struct work_struct *work)
158 {
159         struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
160                                                                         ws);
161         struct pblk *pblk = line_ws->pblk;
162         struct pblk_line *line = line_ws->line;
163         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
164         struct pblk_line_meta *lm = &pblk->lm;
165         struct pblk_gc *gc = &pblk->gc;
166         struct line_emeta *emeta_buf;
167         struct pblk_line_ws *line_rq_ws;
168         struct pblk_gc_rq *gc_rq;
169         __le64 *lba_list;
170         int sec_left, nr_secs, bit;
171         int ret;
172
173         emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
174                                                                 GFP_KERNEL);
175         if (!emeta_buf) {
176                 pr_err("pblk: cannot use GC emeta\n");
177                 return;
178         }
179
180         ret = pblk_line_read_emeta(pblk, line, emeta_buf);
181         if (ret) {
182                 pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
183                 goto fail_free_emeta;
184         }
185
186         /* If this read fails, it means that emeta is corrupted. For now, leave
187          * the line untouched. TODO: Implement a recovery routine that scans and
188          * moves all sectors on the line.
189          */
190         lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
191         if (!lba_list) {
192                 pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
193                 goto fail_free_emeta;
194         }
195
196         sec_left = pblk_line_vsc(line);
197         if (sec_left < 0) {
198                 pr_err("pblk: corrupted GC line (%d)\n", line->id);
199                 goto fail_free_emeta;
200         }
201
202         bit = -1;
203 next_rq:
204         gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
205         if (!gc_rq)
206                 goto fail_free_emeta;
207
208         nr_secs = 0;
209         do {
210                 bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
211                                                                 bit + 1);
212                 if (bit > line->emeta_ssec)
213                         break;
214
215                 gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
216         } while (nr_secs < pblk->max_write_pgs);
217
218         if (unlikely(!nr_secs)) {
219                 kfree(gc_rq);
220                 goto out;
221         }
222
223         gc_rq->nr_secs = nr_secs;
224         gc_rq->line = line;
225
226         line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
227         if (!line_rq_ws)
228                 goto fail_free_gc_rq;
229
230         line_rq_ws->pblk = pblk;
231         line_rq_ws->line = line;
232         line_rq_ws->priv = gc_rq;
233
234         down(&gc->gc_sem);
235         kref_get(&line->ref);
236
237         INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
238         queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
239
240         sec_left -= nr_secs;
241         if (sec_left > 0)
242                 goto next_rq;
243
244 out:
245         pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
246         mempool_free(line_ws, pblk->line_ws_pool);
247
248         kref_put(&line->ref, pblk_line_put);
249         atomic_dec(&gc->inflight_gc);
250
251         return;
252
253 fail_free_gc_rq:
254         kfree(gc_rq);
255 fail_free_emeta:
256         pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
257         pblk_put_line_back(pblk, line);
258         kref_put(&line->ref, pblk_line_put);
259         mempool_free(line_ws, pblk->line_ws_pool);
260         atomic_dec(&gc->inflight_gc);
261
262         pr_err("pblk: Failed to GC line %d\n", line->id);
263 }
264
265 static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
266 {
267         struct pblk_gc *gc = &pblk->gc;
268         struct pblk_line_ws *line_ws;
269
270         pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
271
272         line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
273         if (!line_ws)
274                 return -ENOMEM;
275
276         line_ws->pblk = pblk;
277         line_ws->line = line;
278
279         INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
280         queue_work(gc->gc_reader_wq, &line_ws->ws);
281
282         return 0;
283 }
284
285 static int pblk_gc_read(struct pblk *pblk)
286 {
287         struct pblk_gc *gc = &pblk->gc;
288         struct pblk_line *line;
289
290         spin_lock(&gc->r_lock);
291         if (list_empty(&gc->r_list)) {
292                 spin_unlock(&gc->r_lock);
293                 return 1;
294         }
295
296         line = list_first_entry(&gc->r_list, struct pblk_line, list);
297         list_del(&line->list);
298         spin_unlock(&gc->r_lock);
299
300         pblk_gc_kick(pblk);
301
302         if (pblk_gc_line(pblk, line))
303                 pr_err("pblk: failed to GC line %d\n", line->id);
304
305         return 0;
306 }
307
308 static void pblk_gc_reader_kick(struct pblk_gc *gc)
309 {
310         wake_up_process(gc->gc_reader_ts);
311 }
312
313 static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
314                                                  struct list_head *group_list)
315 {
316         struct pblk_line *line, *victim;
317         int line_vsc, victim_vsc;
318
319         victim = list_first_entry(group_list, struct pblk_line, list);
320         list_for_each_entry(line, group_list, list) {
321                 line_vsc = le32_to_cpu(*line->vsc);
322                 victim_vsc = le32_to_cpu(*victim->vsc);
323                 if (line_vsc < victim_vsc)
324                         victim = line;
325         }
326
327         return victim;
328 }
329
330 static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
331 {
332         unsigned int nr_blocks_free, nr_blocks_need;
333
334         nr_blocks_need = pblk_rl_high_thrs(rl);
335         nr_blocks_free = pblk_rl_nr_free_blks(rl);
336
337         /* This is not critical, no need to take lock here */
338         return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
339 }
340
341 /*
342  * Lines with no valid sectors will be returned to the free list immediately. If
343  * GC is activated - either because the free block count is under the determined
344  * threshold, or because it is being forced from user space - only lines with a
345  * high count of invalid sectors will be recycled.
346  */
347 static void pblk_gc_run(struct pblk *pblk)
348 {
349         struct pblk_line_mgmt *l_mg = &pblk->l_mg;
350         struct pblk_gc *gc = &pblk->gc;
351         struct pblk_line *line;
352         struct list_head *group_list;
353         bool run_gc;
354         int inflight_gc, gc_group = 0, prev_group = 0;
355
356         do {
357                 spin_lock(&l_mg->gc_lock);
358                 if (list_empty(&l_mg->gc_full_list)) {
359                         spin_unlock(&l_mg->gc_lock);
360                         break;
361                 }
362
363                 line = list_first_entry(&l_mg->gc_full_list,
364                                                         struct pblk_line, list);
365
366                 spin_lock(&line->lock);
367                 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
368                 line->state = PBLK_LINESTATE_GC;
369                 spin_unlock(&line->lock);
370
371                 list_del(&line->list);
372                 spin_unlock(&l_mg->gc_lock);
373
374                 kref_put(&line->ref, pblk_line_put);
375         } while (1);
376
377         run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
378         if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
379                 return;
380
381 next_gc_group:
382         group_list = l_mg->gc_lists[gc_group++];
383
384         do {
385                 spin_lock(&l_mg->gc_lock);
386                 if (list_empty(group_list)) {
387                         spin_unlock(&l_mg->gc_lock);
388                         break;
389                 }
390
391                 line = pblk_gc_get_victim_line(pblk, group_list);
392
393                 spin_lock(&line->lock);
394                 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
395                 line->state = PBLK_LINESTATE_GC;
396                 spin_unlock(&line->lock);
397
398                 list_del(&line->list);
399                 spin_unlock(&l_mg->gc_lock);
400
401                 spin_lock(&gc->r_lock);
402                 list_add_tail(&line->list, &gc->r_list);
403                 spin_unlock(&gc->r_lock);
404
405                 inflight_gc = atomic_inc_return(&gc->inflight_gc);
406                 pblk_gc_reader_kick(gc);
407
408                 prev_group = 1;
409
410                 /* No need to queue up more GC lines than we can handle */
411                 run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
412                 if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
413                         break;
414         } while (1);
415
416         if (!prev_group && pblk->rl.rb_state > gc_group &&
417                                                 gc_group < PBLK_GC_NR_LISTS)
418                 goto next_gc_group;
419 }
420
421 void pblk_gc_kick(struct pblk *pblk)
422 {
423         struct pblk_gc *gc = &pblk->gc;
424
425         wake_up_process(gc->gc_ts);
426         pblk_gc_writer_kick(gc);
427         pblk_gc_reader_kick(gc);
428         mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
429 }
430
431 static void pblk_gc_timer(unsigned long data)
432 {
433         struct pblk *pblk = (struct pblk *)data;
434
435         pblk_gc_kick(pblk);
436 }
437
438 static int pblk_gc_ts(void *data)
439 {
440         struct pblk *pblk = data;
441
442         while (!kthread_should_stop()) {
443                 pblk_gc_run(pblk);
444                 set_current_state(TASK_INTERRUPTIBLE);
445                 io_schedule();
446         }
447
448         return 0;
449 }
450
451 static int pblk_gc_writer_ts(void *data)
452 {
453         struct pblk *pblk = data;
454
455         while (!kthread_should_stop()) {
456                 if (!pblk_gc_write(pblk))
457                         continue;
458                 set_current_state(TASK_INTERRUPTIBLE);
459                 io_schedule();
460         }
461
462         return 0;
463 }
464
465 static int pblk_gc_reader_ts(void *data)
466 {
467         struct pblk *pblk = data;
468
469         while (!kthread_should_stop()) {
470                 if (!pblk_gc_read(pblk))
471                         continue;
472                 set_current_state(TASK_INTERRUPTIBLE);
473                 io_schedule();
474         }
475
476         return 0;
477 }
478
479 static void pblk_gc_start(struct pblk *pblk)
480 {
481         pblk->gc.gc_active = 1;
482         pr_debug("pblk: gc start\n");
483 }
484
485 void pblk_gc_should_start(struct pblk *pblk)
486 {
487         struct pblk_gc *gc = &pblk->gc;
488
489         if (gc->gc_enabled && !gc->gc_active)
490                 pblk_gc_start(pblk);
491
492         pblk_gc_kick(pblk);
493 }
494
495 /*
496  * If flush_wq == 1 then no lock should be held by the caller since
497  * flush_workqueue can sleep
498  */
499 static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
500 {
501         pblk->gc.gc_active = 0;
502         pr_debug("pblk: gc stop\n");
503 }
504
505 void pblk_gc_should_stop(struct pblk *pblk)
506 {
507         struct pblk_gc *gc = &pblk->gc;
508
509         if (gc->gc_active && !gc->gc_forced)
510                 pblk_gc_stop(pblk, 0);
511 }
512
513 void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
514                               int *gc_active)
515 {
516         struct pblk_gc *gc = &pblk->gc;
517
518         spin_lock(&gc->lock);
519         *gc_enabled = gc->gc_enabled;
520         *gc_active = gc->gc_active;
521         spin_unlock(&gc->lock);
522 }
523
524 int pblk_gc_sysfs_force(struct pblk *pblk, int force)
525 {
526         struct pblk_gc *gc = &pblk->gc;
527
528         if (force < 0 || force > 1)
529                 return -EINVAL;
530
531         spin_lock(&gc->lock);
532         gc->gc_forced = force;
533
534         if (force)
535                 gc->gc_enabled = 1;
536         else
537                 gc->gc_enabled = 0;
538         spin_unlock(&gc->lock);
539
540         pblk_gc_should_start(pblk);
541
542         return 0;
543 }
544
545 int pblk_gc_init(struct pblk *pblk)
546 {
547         struct pblk_gc *gc = &pblk->gc;
548         int ret;
549
550         gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
551         if (IS_ERR(gc->gc_ts)) {
552                 pr_err("pblk: could not allocate GC main kthread\n");
553                 return PTR_ERR(gc->gc_ts);
554         }
555
556         gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
557                                                         "pblk-gc-writer-ts");
558         if (IS_ERR(gc->gc_writer_ts)) {
559                 pr_err("pblk: could not allocate GC writer kthread\n");
560                 ret = PTR_ERR(gc->gc_writer_ts);
561                 goto fail_free_main_kthread;
562         }
563
564         gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
565                                                         "pblk-gc-reader-ts");
566         if (IS_ERR(gc->gc_reader_ts)) {
567                 pr_err("pblk: could not allocate GC reader kthread\n");
568                 ret = PTR_ERR(gc->gc_reader_ts);
569                 goto fail_free_writer_kthread;
570         }
571
572         setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
573         mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
574
575         gc->gc_active = 0;
576         gc->gc_forced = 0;
577         gc->gc_enabled = 1;
578         gc->w_entries = 0;
579         atomic_set(&gc->inflight_gc, 0);
580
581         /* Workqueue that reads valid sectors from a line and submit them to the
582          * GC writer to be recycled.
583          */
584         gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
585                         WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
586         if (!gc->gc_line_reader_wq) {
587                 pr_err("pblk: could not allocate GC line reader workqueue\n");
588                 ret = -ENOMEM;
589                 goto fail_free_reader_kthread;
590         }
591
592         /* Workqueue that prepare lines for GC */
593         gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
594                                         WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
595         if (!gc->gc_reader_wq) {
596                 pr_err("pblk: could not allocate GC reader workqueue\n");
597                 ret = -ENOMEM;
598                 goto fail_free_reader_line_wq;
599         }
600
601         spin_lock_init(&gc->lock);
602         spin_lock_init(&gc->w_lock);
603         spin_lock_init(&gc->r_lock);
604
605         sema_init(&gc->gc_sem, 128);
606
607         INIT_LIST_HEAD(&gc->w_list);
608         INIT_LIST_HEAD(&gc->r_list);
609
610         return 0;
611
612 fail_free_reader_line_wq:
613         destroy_workqueue(gc->gc_line_reader_wq);
614 fail_free_reader_kthread:
615         kthread_stop(gc->gc_reader_ts);
616 fail_free_writer_kthread:
617         kthread_stop(gc->gc_writer_ts);
618 fail_free_main_kthread:
619         kthread_stop(gc->gc_ts);
620
621         return ret;
622 }
623
624 void pblk_gc_exit(struct pblk *pblk)
625 {
626         struct pblk_gc *gc = &pblk->gc;
627
628         flush_workqueue(gc->gc_reader_wq);
629         flush_workqueue(gc->gc_line_reader_wq);
630
631         del_timer(&gc->gc_timer);
632         pblk_gc_stop(pblk, 1);
633
634         if (gc->gc_ts)
635                 kthread_stop(gc->gc_ts);
636
637         if (gc->gc_reader_wq)
638                 destroy_workqueue(gc->gc_reader_wq);
639
640         if (gc->gc_line_reader_wq)
641                 destroy_workqueue(gc->gc_line_reader_wq);
642
643         if (gc->gc_writer_ts)
644                 kthread_stop(gc->gc_writer_ts);
645
646         if (gc->gc_reader_ts)
647                 kthread_stop(gc->gc_reader_ts);
648 }