]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - block/blk-stat.c
md: raid1: improve write behind
[karo-tx-linux.git] / block / blk-stat.c
1 /*
2  * Block stat tracking code
3  *
4  * Copyright (C) 2016 Jens Axboe
5  */
6 #include <linux/kernel.h>
7 #include <linux/blk-mq.h>
8
9 #include "blk-stat.h"
10 #include "blk-mq.h"
11
12 static void blk_stat_flush_batch(struct blk_rq_stat *stat)
13 {
14         const s32 nr_batch = READ_ONCE(stat->nr_batch);
15         const s32 nr_samples = READ_ONCE(stat->nr_samples);
16
17         if (!nr_batch)
18                 return;
19         if (!nr_samples)
20                 stat->mean = div64_s64(stat->batch, nr_batch);
21         else {
22                 stat->mean = div64_s64((stat->mean * nr_samples) +
23                                         stat->batch,
24                                         nr_batch + nr_samples);
25         }
26
27         stat->nr_samples += nr_batch;
28         stat->nr_batch = stat->batch = 0;
29 }
30
31 static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
32 {
33         if (!src->nr_samples)
34                 return;
35
36         blk_stat_flush_batch(src);
37
38         dst->min = min(dst->min, src->min);
39         dst->max = max(dst->max, src->max);
40
41         if (!dst->nr_samples)
42                 dst->mean = src->mean;
43         else {
44                 dst->mean = div64_s64((src->mean * src->nr_samples) +
45                                         (dst->mean * dst->nr_samples),
46                                         dst->nr_samples + src->nr_samples);
47         }
48         dst->nr_samples += src->nr_samples;
49 }
50
51 static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
52 {
53         struct blk_mq_hw_ctx *hctx;
54         struct blk_mq_ctx *ctx;
55         uint64_t latest = 0;
56         int i, j, nr;
57
58         blk_stat_init(&dst[BLK_STAT_READ]);
59         blk_stat_init(&dst[BLK_STAT_WRITE]);
60
61         nr = 0;
62         do {
63                 uint64_t newest = 0;
64
65                 queue_for_each_hw_ctx(q, hctx, i) {
66                         hctx_for_each_ctx(hctx, ctx, j) {
67                                 blk_stat_flush_batch(&ctx->stat[BLK_STAT_READ]);
68                                 blk_stat_flush_batch(&ctx->stat[BLK_STAT_WRITE]);
69
70                                 if (!ctx->stat[BLK_STAT_READ].nr_samples &&
71                                     !ctx->stat[BLK_STAT_WRITE].nr_samples)
72                                         continue;
73                                 if (ctx->stat[BLK_STAT_READ].time > newest)
74                                         newest = ctx->stat[BLK_STAT_READ].time;
75                                 if (ctx->stat[BLK_STAT_WRITE].time > newest)
76                                         newest = ctx->stat[BLK_STAT_WRITE].time;
77                         }
78                 }
79
80                 /*
81                  * No samples
82                  */
83                 if (!newest)
84                         break;
85
86                 if (newest > latest)
87                         latest = newest;
88
89                 queue_for_each_hw_ctx(q, hctx, i) {
90                         hctx_for_each_ctx(hctx, ctx, j) {
91                                 if (ctx->stat[BLK_STAT_READ].time == newest) {
92                                         blk_stat_sum(&dst[BLK_STAT_READ],
93                                                      &ctx->stat[BLK_STAT_READ]);
94                                         nr++;
95                                 }
96                                 if (ctx->stat[BLK_STAT_WRITE].time == newest) {
97                                         blk_stat_sum(&dst[BLK_STAT_WRITE],
98                                                      &ctx->stat[BLK_STAT_WRITE]);
99                                         nr++;
100                                 }
101                         }
102                 }
103                 /*
104                  * If we race on finding an entry, just loop back again.
105                  * Should be very rare.
106                  */
107         } while (!nr);
108
109         dst[BLK_STAT_READ].time = dst[BLK_STAT_WRITE].time = latest;
110 }
111
112 void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
113 {
114         if (q->mq_ops)
115                 blk_mq_stat_get(q, dst);
116         else {
117                 blk_stat_flush_batch(&q->rq_stats[BLK_STAT_READ]);
118                 blk_stat_flush_batch(&q->rq_stats[BLK_STAT_WRITE]);
119                 memcpy(&dst[BLK_STAT_READ], &q->rq_stats[BLK_STAT_READ],
120                                 sizeof(struct blk_rq_stat));
121                 memcpy(&dst[BLK_STAT_WRITE], &q->rq_stats[BLK_STAT_WRITE],
122                                 sizeof(struct blk_rq_stat));
123         }
124 }
125
126 void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst)
127 {
128         struct blk_mq_ctx *ctx;
129         unsigned int i, nr;
130
131         nr = 0;
132         do {
133                 uint64_t newest = 0;
134
135                 hctx_for_each_ctx(hctx, ctx, i) {
136                         blk_stat_flush_batch(&ctx->stat[BLK_STAT_READ]);
137                         blk_stat_flush_batch(&ctx->stat[BLK_STAT_WRITE]);
138
139                         if (!ctx->stat[BLK_STAT_READ].nr_samples &&
140                             !ctx->stat[BLK_STAT_WRITE].nr_samples)
141                                 continue;
142
143                         if (ctx->stat[BLK_STAT_READ].time > newest)
144                                 newest = ctx->stat[BLK_STAT_READ].time;
145                         if (ctx->stat[BLK_STAT_WRITE].time > newest)
146                                 newest = ctx->stat[BLK_STAT_WRITE].time;
147                 }
148
149                 if (!newest)
150                         break;
151
152                 hctx_for_each_ctx(hctx, ctx, i) {
153                         if (ctx->stat[BLK_STAT_READ].time == newest) {
154                                 blk_stat_sum(&dst[BLK_STAT_READ],
155                                                 &ctx->stat[BLK_STAT_READ]);
156                                 nr++;
157                         }
158                         if (ctx->stat[BLK_STAT_WRITE].time == newest) {
159                                 blk_stat_sum(&dst[BLK_STAT_WRITE],
160                                                 &ctx->stat[BLK_STAT_WRITE]);
161                                 nr++;
162                         }
163                 }
164                 /*
165                  * If we race on finding an entry, just loop back again.
166                  * Should be very rare, as the window is only updated
167                  * occasionally
168                  */
169         } while (!nr);
170 }
171
172 static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now)
173 {
174         stat->min = -1ULL;
175         stat->max = stat->nr_samples = stat->mean = 0;
176         stat->batch = stat->nr_batch = 0;
177         stat->time = time_now & BLK_STAT_NSEC_MASK;
178 }
179
180 void blk_stat_init(struct blk_rq_stat *stat)
181 {
182         __blk_stat_init(stat, ktime_to_ns(ktime_get()));
183 }
184
185 static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now)
186 {
187         return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK);
188 }
189
190 bool blk_stat_is_current(struct blk_rq_stat *stat)
191 {
192         return __blk_stat_is_current(stat, ktime_to_ns(ktime_get()));
193 }
194
195 void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
196 {
197         s64 now, value;
198
199         now = __blk_stat_time(ktime_to_ns(ktime_get()));
200         if (now < blk_stat_time(&rq->issue_stat))
201                 return;
202
203         if (!__blk_stat_is_current(stat, now))
204                 __blk_stat_init(stat, now);
205
206         value = now - blk_stat_time(&rq->issue_stat);
207         if (value > stat->max)
208                 stat->max = value;
209         if (value < stat->min)
210                 stat->min = value;
211
212         if (stat->batch + value < stat->batch ||
213             stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
214                 blk_stat_flush_batch(stat);
215
216         stat->batch += value;
217         stat->nr_batch++;
218 }
219
220 void blk_stat_clear(struct request_queue *q)
221 {
222         if (q->mq_ops) {
223                 struct blk_mq_hw_ctx *hctx;
224                 struct blk_mq_ctx *ctx;
225                 int i, j;
226
227                 queue_for_each_hw_ctx(q, hctx, i) {
228                         hctx_for_each_ctx(hctx, ctx, j) {
229                                 blk_stat_init(&ctx->stat[BLK_STAT_READ]);
230                                 blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
231                         }
232                 }
233         } else {
234                 blk_stat_init(&q->rq_stats[BLK_STAT_READ]);
235                 blk_stat_init(&q->rq_stats[BLK_STAT_WRITE]);
236         }
237 }
238
239 void blk_stat_set_issue_time(struct blk_issue_stat *stat)
240 {
241         stat->time = (stat->time & BLK_STAT_MASK) |
242                         (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK);
243 }
244
245 /*
246  * Enable stat tracking, return whether it was enabled
247  */
248 bool blk_stat_enable(struct request_queue *q)
249 {
250         if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
251                 set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
252                 return false;
253         }
254
255         return true;
256 }