]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/orangefs/pvfs2-bufmap.c
Merge remote-tracking branch 'orangefs/for-next'
[karo-tx-linux.git] / fs / orangefs / pvfs2-bufmap.c
1 /*
2  * (C) 2001 Clemson University and The University of Chicago
3  *
4  * See COPYING in top-level directory.
5  */
6 #include "protocol.h"
7 #include "pvfs2-kernel.h"
8 #include "pvfs2-bufmap.h"
9
10 DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq);
11
12 static struct pvfs2_bufmap {
13         atomic_t refcnt;
14
15         int desc_size;
16         int desc_shift;
17         int desc_count;
18         int total_size;
19         int page_count;
20
21         struct page **page_array;
22         struct pvfs_bufmap_desc *desc_array;
23
24         /* array to track usage of buffer descriptors */
25         int *buffer_index_array;
26         spinlock_t buffer_index_lock;
27
28         /* array to track usage of buffer descriptors for readdir */
29         int readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT];
30         spinlock_t readdir_index_lock;
31 } *__pvfs2_bufmap;
32
33 static DEFINE_SPINLOCK(pvfs2_bufmap_lock);
34
35 static void
36 pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap)
37 {
38         int i;
39
40         for (i = 0; i < bufmap->page_count; i++)
41                 page_cache_release(bufmap->page_array[i]);
42 }
43
44 static void
45 pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap)
46 {
47         kfree(bufmap->page_array);
48         kfree(bufmap->desc_array);
49         kfree(bufmap->buffer_index_array);
50         kfree(bufmap);
51 }
52
53 struct pvfs2_bufmap *pvfs2_bufmap_ref(void)
54 {
55         struct pvfs2_bufmap *bufmap = NULL;
56
57         spin_lock(&pvfs2_bufmap_lock);
58         if (__pvfs2_bufmap) {
59                 bufmap = __pvfs2_bufmap;
60                 atomic_inc(&bufmap->refcnt);
61         }
62         spin_unlock(&pvfs2_bufmap_lock);
63         return bufmap;
64 }
65
66 void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap)
67 {
68         if (atomic_dec_and_lock(&bufmap->refcnt, &pvfs2_bufmap_lock)) {
69                 __pvfs2_bufmap = NULL;
70                 spin_unlock(&pvfs2_bufmap_lock);
71
72                 pvfs2_bufmap_unmap(bufmap);
73                 pvfs2_bufmap_free(bufmap);
74         }
75 }
76
77 inline int pvfs_bufmap_size_query(void)
78 {
79         struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
80         int size = bufmap ? bufmap->desc_size : 0;
81
82         pvfs2_bufmap_unref(bufmap);
83         return size;
84 }
85
86 inline int pvfs_bufmap_shift_query(void)
87 {
88         struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
89         int shift = bufmap ? bufmap->desc_shift : 0;
90
91         pvfs2_bufmap_unref(bufmap);
92         return shift;
93 }
94
95 static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
96 static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
97
98 /*
99  * get_bufmap_init
100  *
101  * If bufmap_init is 1, then the shared memory system, including the
102  * buffer_index_array, is available.  Otherwise, it is not.
103  *
104  * returns the value of bufmap_init
105  */
106 int get_bufmap_init(void)
107 {
108         return __pvfs2_bufmap ? 1 : 0;
109 }
110
111
112 static struct pvfs2_bufmap *
113 pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc)
114 {
115         struct pvfs2_bufmap *bufmap;
116
117         bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
118         if (!bufmap)
119                 goto out;
120
121         atomic_set(&bufmap->refcnt, 1);
122         bufmap->total_size = user_desc->total_size;
123         bufmap->desc_count = user_desc->count;
124         bufmap->desc_size = user_desc->size;
125         bufmap->desc_shift = ilog2(bufmap->desc_size);
126
127         spin_lock_init(&bufmap->buffer_index_lock);
128         bufmap->buffer_index_array =
129                 kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
130         if (!bufmap->buffer_index_array) {
131                 gossip_err("pvfs2: could not allocate %d buffer indices\n",
132                                 bufmap->desc_count);
133                 goto out_free_bufmap;
134         }
135         spin_lock_init(&bufmap->readdir_index_lock);
136
137         bufmap->desc_array =
138                 kcalloc(bufmap->desc_count, sizeof(struct pvfs_bufmap_desc),
139                         GFP_KERNEL);
140         if (!bufmap->desc_array) {
141                 gossip_err("pvfs2: could not allocate %d descriptors\n",
142                                 bufmap->desc_count);
143                 goto out_free_index_array;
144         }
145
146         bufmap->page_count = bufmap->total_size / PAGE_SIZE;
147
148         /* allocate storage to track our page mappings */
149         bufmap->page_array =
150                 kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
151         if (!bufmap->page_array)
152                 goto out_free_desc_array;
153
154         return bufmap;
155
156 out_free_desc_array:
157         kfree(bufmap->desc_array);
158 out_free_index_array:
159         kfree(bufmap->buffer_index_array);
160 out_free_bufmap:
161         kfree(bufmap);
162 out:
163         return NULL;
164 }
165
166 static int
167 pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap,
168                 struct PVFS_dev_map_desc *user_desc)
169 {
170         int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
171         int offset = 0, ret, i;
172
173         /* map the pages */
174         down_write(&current->mm->mmap_sem);
175         ret = get_user_pages(current,
176                              current->mm,
177                              (unsigned long)user_desc->ptr,
178                              bufmap->page_count,
179                              1,
180                              0,
181                              bufmap->page_array,
182                              NULL);
183         up_write(&current->mm->mmap_sem);
184
185         if (ret < 0)
186                 return ret;
187
188         if (ret != bufmap->page_count) {
189                 gossip_err("pvfs2 error: asked for %d pages, only got %d.\n",
190                                 bufmap->page_count, ret);
191
192                 for (i = 0; i < ret; i++) {
193                         SetPageError(bufmap->page_array[i]);
194                         page_cache_release(bufmap->page_array[i]);
195                 }
196                 return -ENOMEM;
197         }
198
199         /*
200          * ideally we want to get kernel space pointers for each page, but
201          * we can't kmap that many pages at once if highmem is being used.
202          * so instead, we just kmap/kunmap the page address each time the
203          * kaddr is needed.
204          */
205         for (i = 0; i < bufmap->page_count; i++)
206                 flush_dcache_page(bufmap->page_array[i]);
207
208         /* build a list of available descriptors */
209         for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
210                 bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
211                 bufmap->desc_array[i].array_count = pages_per_desc;
212                 bufmap->desc_array[i].uaddr =
213                     (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
214                 offset += pages_per_desc;
215         }
216
217         return 0;
218 }
219
220 /*
221  * pvfs_bufmap_initialize()
222  *
223  * initializes the mapped buffer interface
224  *
225  * returns 0 on success, -errno on failure
226  */
227 int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc)
228 {
229         struct pvfs2_bufmap *bufmap;
230         int ret = -EINVAL;
231
232         gossip_debug(GOSSIP_BUFMAP_DEBUG,
233                      "pvfs_bufmap_initialize: called (ptr ("
234                      "%p) sz (%d) cnt(%d).\n",
235                      user_desc->ptr,
236                      user_desc->size,
237                      user_desc->count);
238
239         /*
240          * sanity check alignment and size of buffer that caller wants to
241          * work with
242          */
243         if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
244             (unsigned long)user_desc->ptr) {
245                 gossip_err("pvfs2 error: memory alignment (front). %p\n",
246                            user_desc->ptr);
247                 goto out;
248         }
249
250         if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
251             != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
252                 gossip_err("pvfs2 error: memory alignment (back).(%p + %d)\n",
253                            user_desc->ptr,
254                            user_desc->total_size);
255                 goto out;
256         }
257
258         if (user_desc->total_size != (user_desc->size * user_desc->count)) {
259                 gossip_err("pvfs2 error: user provided an oddly sized buffer: (%d, %d, %d)\n",
260                            user_desc->total_size,
261                            user_desc->size,
262                            user_desc->count);
263                 goto out;
264         }
265
266         if ((user_desc->size % PAGE_SIZE) != 0) {
267                 gossip_err("pvfs2 error: bufmap size not page size divisible (%d).\n",
268                            user_desc->size);
269                 goto out;
270         }
271
272         ret = -ENOMEM;
273         bufmap = pvfs2_bufmap_alloc(user_desc);
274         if (!bufmap)
275                 goto out;
276
277         ret = pvfs2_bufmap_map(bufmap, user_desc);
278         if (ret)
279                 goto out_free_bufmap;
280
281
282         spin_lock(&pvfs2_bufmap_lock);
283         if (__pvfs2_bufmap) {
284                 spin_unlock(&pvfs2_bufmap_lock);
285                 gossip_err("pvfs2: error: bufmap already initialized.\n");
286                 ret = -EALREADY;
287                 goto out_unmap_bufmap;
288         }
289         __pvfs2_bufmap = bufmap;
290         spin_unlock(&pvfs2_bufmap_lock);
291
292         /*
293          * If there are operations in pvfs2_bufmap_init_waitq, wake them up.
294          * This scenario occurs when the client-core is restarted and I/O
295          * requests in the in-progress or waiting tables are restarted.  I/O
296          * requests cannot be restarted until the shared memory system is
297          * completely re-initialized, so we put the I/O requests in this
298          * waitq until initialization has completed.  NOTE:  the I/O requests
299          * are also on a timer, so they don't wait forever just in case the
300          * client-core doesn't come back up.
301          */
302         wake_up_interruptible(&pvfs2_bufmap_init_waitq);
303
304         gossip_debug(GOSSIP_BUFMAP_DEBUG,
305                      "pvfs_bufmap_initialize: exiting normally\n");
306         return 0;
307
308 out_unmap_bufmap:
309         pvfs2_bufmap_unmap(bufmap);
310 out_free_bufmap:
311         pvfs2_bufmap_free(bufmap);
312 out:
313         return ret;
314 }
315
316 /*
317  * pvfs_bufmap_finalize()
318  *
319  * shuts down the mapped buffer interface and releases any resources
320  * associated with it
321  *
322  * no return value
323  */
324 void pvfs_bufmap_finalize(void)
325 {
326         gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_finalize: called\n");
327         BUG_ON(!__pvfs2_bufmap);
328         pvfs2_bufmap_unref(__pvfs2_bufmap);
329         gossip_debug(GOSSIP_BUFMAP_DEBUG,
330                      "pvfs2_bufmap_finalize: exiting normally\n");
331 }
332
333 struct slot_args {
334         int slot_count;
335         int *slot_array;
336         spinlock_t *slot_lock;
337         wait_queue_head_t *slot_wq;
338 };
339
340 static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
341 {
342         int ret = -1;
343         int i = 0;
344         DECLARE_WAITQUEUE(my_wait, current);
345
346
347         add_wait_queue_exclusive(slargs->slot_wq, &my_wait);
348
349         while (1) {
350                 set_current_state(TASK_INTERRUPTIBLE);
351
352                 /*
353                  * check for available desc, slot_lock is the appropriate
354                  * index_lock
355                  */
356                 spin_lock(slargs->slot_lock);
357                 for (i = 0; i < slargs->slot_count; i++)
358                         if (slargs->slot_array[i] == 0) {
359                                 slargs->slot_array[i] = 1;
360                                 *buffer_index = i;
361                                 ret = 0;
362                                 break;
363                         }
364                 spin_unlock(slargs->slot_lock);
365
366                 /* if we acquired a buffer, then break out of while */
367                 if (ret == 0)
368                         break;
369
370                 if (!signal_pending(current)) {
371                         int timeout =
372                             MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
373                         gossip_debug(GOSSIP_BUFMAP_DEBUG,
374                                      "[BUFMAP]: waiting %d "
375                                      "seconds for a slot\n",
376                                      slot_timeout_secs);
377                         if (!schedule_timeout(timeout)) {
378                                 gossip_debug(GOSSIP_BUFMAP_DEBUG,
379                                              "*** wait_for_a_slot timed out\n");
380                                 ret = -ETIMEDOUT;
381                                 break;
382                         }
383                         gossip_debug(GOSSIP_BUFMAP_DEBUG,
384                           "[BUFMAP]: woken up by a slot becoming available.\n");
385                         continue;
386                 }
387
388                 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n",
389                              __func__);
390                 ret = -EINTR;
391                 break;
392         }
393
394         set_current_state(TASK_RUNNING);
395         remove_wait_queue(slargs->slot_wq, &my_wait);
396         return ret;
397 }
398
399 static void put_back_slot(struct slot_args *slargs, int buffer_index)
400 {
401         /* slot_lock is the appropriate index_lock */
402         spin_lock(slargs->slot_lock);
403         if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
404                 spin_unlock(slargs->slot_lock);
405                 return;
406         }
407
408         /* put the desc back on the queue */
409         slargs->slot_array[buffer_index] = 0;
410         spin_unlock(slargs->slot_lock);
411
412         /* wake up anyone who may be sleeping on the queue */
413         wake_up_interruptible(slargs->slot_wq);
414 }
415
416 /*
417  * pvfs_bufmap_get()
418  *
419  * gets a free mapped buffer descriptor, will sleep until one becomes
420  * available if necessary
421  *
422  * returns 0 on success, -errno on failure
423  */
424 int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index)
425 {
426         struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
427         struct slot_args slargs;
428         int ret;
429
430         if (!bufmap) {
431                 gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
432                 return -EIO;
433         }
434
435         slargs.slot_count = bufmap->desc_count;
436         slargs.slot_array = bufmap->buffer_index_array;
437         slargs.slot_lock = &bufmap->buffer_index_lock;
438         slargs.slot_wq = &bufmap_waitq;
439         ret = wait_for_a_slot(&slargs, buffer_index);
440         if (ret)
441                 pvfs2_bufmap_unref(bufmap);
442         *mapp = bufmap;
443         return ret;
444 }
445
446 /*
447  * pvfs_bufmap_put()
448  *
449  * returns a mapped buffer descriptor to the collection
450  *
451  * no return value
452  */
453 void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index)
454 {
455         struct slot_args slargs;
456
457         slargs.slot_count = bufmap->desc_count;
458         slargs.slot_array = bufmap->buffer_index_array;
459         slargs.slot_lock = &bufmap->buffer_index_lock;
460         slargs.slot_wq = &bufmap_waitq;
461         put_back_slot(&slargs, buffer_index);
462         pvfs2_bufmap_unref(bufmap);
463 }
464
465 /*
466  * readdir_index_get()
467  *
468  * gets a free descriptor, will sleep until one becomes
469  * available if necessary.
470  * Although the readdir buffers are not mapped into kernel space
471  * we could do that at a later point of time. Regardless, these
472  * indices are used by the client-core.
473  *
474  * returns 0 on success, -errno on failure
475  */
476 int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index)
477 {
478         struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
479         struct slot_args slargs;
480         int ret;
481
482         if (!bufmap) {
483                 gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
484                 return -EIO;
485         }
486
487         slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
488         slargs.slot_array = bufmap->readdir_index_array;
489         slargs.slot_lock = &bufmap->readdir_index_lock;
490         slargs.slot_wq = &readdir_waitq;
491         ret = wait_for_a_slot(&slargs, buffer_index);
492         if (ret)
493                 pvfs2_bufmap_unref(bufmap);
494         *mapp = bufmap;
495         return ret;
496 }
497
498 void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index)
499 {
500         struct slot_args slargs;
501
502         slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
503         slargs.slot_array = bufmap->readdir_index_array;
504         slargs.slot_lock = &bufmap->readdir_index_lock;
505         slargs.slot_wq = &readdir_waitq;
506         put_back_slot(&slargs, buffer_index);
507         pvfs2_bufmap_unref(bufmap);
508 }
509
510 int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap,
511                                 struct iov_iter *iter,
512                                 int buffer_index,
513                                 size_t size)
514 {
515         struct pvfs_bufmap_desc *to;
516         struct page *page;
517         size_t copied;
518         int i;
519
520         gossip_debug(GOSSIP_BUFMAP_DEBUG,
521                      "%s: buffer_index:%d: size:%lu:\n",
522                      __func__, buffer_index, size);
523
524         to = &bufmap->desc_array[buffer_index];
525
526         for (i = 0; size; i++) {
527                 page = to->page_array[i];
528                 copied = copy_page_from_iter(page, 0, PAGE_SIZE, iter);
529                 size -= copied;
530                 if ((copied == 0) && (size))
531                         break;
532         }
533
534         return size ? -EFAULT : 0;
535
536 }
537
538 /*
539  * Iterate through the array of pages containing the bytes from
540  * a file being read.
541  *
542  */
543 int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap,
544                                     struct iov_iter *iter,
545                                     int buffer_index)
546 {
547         struct pvfs_bufmap_desc *from;
548         struct page *page;
549         int i;
550         size_t written;
551
552         gossip_debug(GOSSIP_BUFMAP_DEBUG,
553                      "%s: buffer_index:%d: iov_iter_count(iter):%lu:\n",
554                      __func__, buffer_index, iov_iter_count(iter));
555
556         from = &bufmap->desc_array[buffer_index];
557
558         for (i = 0; iov_iter_count(iter); i++) {
559                 page = from->page_array[i];
560                 written = copy_page_to_iter(page, 0, PAGE_SIZE, iter);
561                 if ((written == 0) && (iov_iter_count(iter)))
562                         break;
563         }
564
565         return iov_iter_count(iter) ? -EFAULT : 0;
566 }