]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/orangefs/devpvfs2-req.c
Orangefs: kernel client part 4
[karo-tx-linux.git] / fs / orangefs / devpvfs2-req.c
1 /*
2  * (C) 2001 Clemson University and The University of Chicago
3  *
4  * Changes by Acxiom Corporation to add protocol version to kernel
5  * communication, Copyright Acxiom Corporation, 2005.
6  *
7  * See COPYING in top-level directory.
8  */
9
10 #include "protocol.h"
11 #include "pvfs2-kernel.h"
12 #include "pvfs2-dev-proto.h"
13 #include "pvfs2-bufmap.h"
14
15 #include <linux/debugfs.h>
16 #include <linux/slab.h>
17
18 /* this file implements the /dev/pvfs2-req device node */
19
20 static int open_access_count;
21
22 #define DUMP_DEVICE_ERROR()                                                   \
23 do {                                                                          \
24         gossip_err("*****************************************************\n");\
25         gossip_err("PVFS2 Device Error:  You cannot open the device file ");  \
26         gossip_err("\n/dev/%s more than once.  Please make sure that\nthere " \
27                    "are no ", PVFS2_REQDEVICE_NAME);                          \
28         gossip_err("instances of a program using this device\ncurrently "     \
29                    "running. (You must verify this!)\n");                     \
30         gossip_err("For example, you can use the lsof program as follows:\n");\
31         gossip_err("'lsof | grep %s' (run this as root)\n",                   \
32                    PVFS2_REQDEVICE_NAME);                                     \
33         gossip_err("  open_access_count = %d\n", open_access_count);          \
34         gossip_err("*****************************************************\n");\
35 } while (0)
36
37 static int hash_func(__u64 tag, int table_size)
38 {
39         return tag % ((unsigned int)table_size);
40 }
41
42 static void pvfs2_devreq_add_op(struct pvfs2_kernel_op_s *op)
43 {
44         int index = hash_func(op->tag, hash_table_size);
45
46         spin_lock(&htable_ops_in_progress_lock);
47         list_add_tail(&op->list, &htable_ops_in_progress[index]);
48         spin_unlock(&htable_ops_in_progress_lock);
49 }
50
51 static struct pvfs2_kernel_op_s *pvfs2_devreq_remove_op(__u64 tag)
52 {
53         struct pvfs2_kernel_op_s *op, *next;
54         int index;
55
56         index = hash_func(tag, hash_table_size);
57
58         spin_lock(&htable_ops_in_progress_lock);
59         list_for_each_entry_safe(op,
60                                  next,
61                                  &htable_ops_in_progress[index],
62                                  list) {
63                 if (op->tag == tag) {
64                         list_del(&op->list);
65                         spin_unlock(&htable_ops_in_progress_lock);
66                         return op;
67                 }
68         }
69
70         spin_unlock(&htable_ops_in_progress_lock);
71         return NULL;
72 }
73
74 static int pvfs2_devreq_open(struct inode *inode, struct file *file)
75 {
76         int ret = -EINVAL;
77
78         if (!(file->f_flags & O_NONBLOCK)) {
79                 gossip_err("pvfs2: device cannot be opened in blocking mode\n");
80                 goto out;
81         }
82         ret = -EACCES;
83         gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: opening device\n");
84         mutex_lock(&devreq_mutex);
85
86         if (open_access_count == 0) {
87                 ret = generic_file_open(inode, file);
88                 if (ret == 0)
89                         open_access_count++;
90         } else {
91                 DUMP_DEVICE_ERROR();
92         }
93         mutex_unlock(&devreq_mutex);
94
95 out:
96
97         gossip_debug(GOSSIP_DEV_DEBUG,
98                      "pvfs2-client-core: open device complete (ret = %d)\n",
99                      ret);
100         return ret;
101 }
102
103 static ssize_t pvfs2_devreq_read(struct file *file,
104                                  char __user *buf,
105                                  size_t count, loff_t *offset)
106 {
107         int ret = 0;
108         ssize_t len = 0;
109         struct pvfs2_kernel_op_s *cur_op = NULL;
110         static __s32 magic = PVFS2_DEVREQ_MAGIC;
111         __s32 proto_ver = PVFS_KERNEL_PROTO_VERSION;
112
113         if (!(file->f_flags & O_NONBLOCK)) {
114                 /* We do not support blocking reads/opens any more */
115                 gossip_err("pvfs2: blocking reads are not supported! (pvfs2-client-core bug)\n");
116                 return -EINVAL;
117         } else {
118                 struct pvfs2_kernel_op_s *op = NULL, *temp = NULL;
119                 /* get next op (if any) from top of list */
120                 spin_lock(&pvfs2_request_list_lock);
121                 list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) {
122                         __s32 fsid = fsid_of_op(op);
123                         /*
124                          * Check if this op's fsid is known and needs
125                          * remounting
126                          */
127                         if (fsid != PVFS_FS_ID_NULL &&
128                             fs_mount_pending(fsid) == 1) {
129                                 gossip_debug(GOSSIP_DEV_DEBUG,
130                                              "Skipping op tag %llu %s\n",
131                                              llu(op->tag),
132                                              get_opname_string(op));
133                                 continue;
134                         } else {
135                                 /*
136                                  * op does not belong to any particular fsid
137                                  * or already mounted.. let it through
138                                  */
139                                 cur_op = op;
140                                 spin_lock(&cur_op->lock);
141                                 list_del(&cur_op->list);
142                                 cur_op->op_linger_tmp--;
143                                 /*
144                                  * if there is a trailer, re-add it to
145                                  * the request list.
146                                  */
147                                 if (cur_op->op_linger == 2 &&
148                                     cur_op->op_linger_tmp == 1) {
149                                         if (cur_op->upcall.trailer_size <= 0 ||
150                                             cur_op->upcall.trailer_buf == NULL)
151                                                 gossip_err("BUG:trailer_size is %ld and trailer buf is %p\n", (long)cur_op->upcall.trailer_size, cur_op->upcall.trailer_buf);
152                                         /* re-add it to the head of the list */
153                                         list_add(&cur_op->list,
154                                                  &pvfs2_request_list);
155                                 }
156                                 spin_unlock(&cur_op->lock);
157                                 break;
158                         }
159                 }
160                 spin_unlock(&pvfs2_request_list_lock);
161         }
162
163         if (cur_op) {
164                 spin_lock(&cur_op->lock);
165
166                 gossip_debug(GOSSIP_DEV_DEBUG,
167                              "client-core: reading op tag %llu %s\n",
168                              llu(cur_op->tag), get_opname_string(cur_op));
169                 if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
170                         if (cur_op->op_linger == 1)
171                                 gossip_err("WARNING: Current op already queued...skipping\n");
172                 } else if (cur_op->op_linger == 1 ||
173                            (cur_op->op_linger == 2 &&
174                             cur_op->op_linger_tmp == 0)) {
175                         /*
176                          * atomically move the operation to the
177                          * htable_ops_in_progress
178                          */
179                         set_op_state_inprogress(cur_op);
180                         pvfs2_devreq_add_op(cur_op);
181                 }
182
183                 spin_unlock(&cur_op->lock);
184
185                 /* 2 cases
186                  * a) OPs with no trailers
187                  * b) OPs with trailers, Stage 1
188                  * Either way push the upcall out
189                  */
190                 if (cur_op->op_linger == 1 ||
191                    (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 1)) {
192                         len = MAX_ALIGNED_DEV_REQ_UPSIZE;
193                         if ((size_t) len <= count) {
194                             ret = copy_to_user(buf,
195                                                &proto_ver,
196                                                sizeof(__s32));
197                             if (ret == 0) {
198                                 ret = copy_to_user(buf + sizeof(__s32),
199                                                    &magic,
200                                                    sizeof(__s32));
201                                 if (ret == 0) {
202                                     ret = copy_to_user(buf+2 * sizeof(__s32),
203                                                        &cur_op->tag,
204                                                        sizeof(__u64));
205                                     if (ret == 0) {
206                                         ret = copy_to_user(
207                                                 buf +
208                                                   2 *
209                                                   sizeof(__s32) +
210                                                   sizeof(__u64),
211                                                 &cur_op->upcall,
212                                                 sizeof(struct pvfs2_upcall_s));
213                                     }
214                                 }
215                             }
216
217                             if (ret) {
218                                 gossip_err("Failed to copy data to user space\n");
219                                 len = -EFAULT;
220                             }
221                         } else {
222                                 gossip_err
223                                     ("Failed to copy data to user space\n");
224                                 len = -EIO;
225                         }
226                 }
227                 /* Stage 2: Push the trailer out */
228                 else if (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 0) {
229                         len = cur_op->upcall.trailer_size;
230                         if ((size_t) len <= count) {
231                                 ret = copy_to_user(buf,
232                                                    cur_op->upcall.trailer_buf,
233                                                    len);
234                                 if (ret) {
235                                         gossip_err("Failed to copy trailer to user space\n");
236                                         len = -EFAULT;
237                                 }
238                         } else {
239                                 gossip_err("Read buffer for trailer is too small (%ld as opposed to %ld)\n",
240                                         (long)count,
241                                         (long)len);
242                                 len = -EIO;
243                         }
244                 } else {
245                         gossip_err("cur_op: %p (op_linger %d), (op_linger_tmp %d), erroneous request list?\n",
246                                 cur_op,
247                                 cur_op->op_linger,
248                                 cur_op->op_linger_tmp);
249                         len = 0;
250                 }
251         } else if (file->f_flags & O_NONBLOCK) {
252                 /*
253                  * if in non-blocking mode, return EAGAIN since no requests are
254                  * ready yet
255                  */
256                 len = -EAGAIN;
257         }
258         return len;
259 }
260
261 /* Function for writev() callers into the device */
262 static ssize_t pvfs2_devreq_writev(struct file *file,
263                                    const struct iovec *iov,
264                                    size_t count,
265                                    loff_t *offset)
266 {
267         struct pvfs2_kernel_op_s *op = NULL;
268         void *buffer = NULL;
269         void *ptr = NULL;
270         unsigned long i = 0;
271         static int max_downsize = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
272         int ret = 0, num_remaining = max_downsize;
273         int notrailer_count = 4; /* num elements in iovec without trailer */
274         int payload_size = 0;
275         __s32 magic = 0;
276         __s32 proto_ver = 0;
277         __u64 tag = 0;
278         ssize_t total_returned_size = 0;
279
280         /* Either there is a trailer or there isn't */
281         if (count != notrailer_count && count != (notrailer_count + 1)) {
282                 gossip_err("Error: Number of iov vectors is (%ld) and notrailer count is %d\n",
283                         count,
284                         notrailer_count);
285                 return -EPROTO;
286         }
287         buffer = dev_req_alloc();
288         if (!buffer)
289                 return -ENOMEM;
290         ptr = buffer;
291
292         for (i = 0; i < notrailer_count; i++) {
293                 if (iov[i].iov_len > num_remaining) {
294                         gossip_err
295                             ("writev error: Freeing buffer and returning\n");
296                         dev_req_release(buffer);
297                         return -EMSGSIZE;
298                 }
299                 ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len);
300                 if (ret) {
301                         gossip_err("Failed to copy data from user space\n");
302                         dev_req_release(buffer);
303                         return -EIO;
304                 }
305                 num_remaining -= iov[i].iov_len;
306                 ptr += iov[i].iov_len;
307                 payload_size += iov[i].iov_len;
308         }
309         total_returned_size = payload_size;
310
311         /* these elements are currently 8 byte aligned (8 bytes for (version +
312          * magic) 8 bytes for tag).  If you add another element, either
313          * make it 8 bytes big, or use get_unaligned when asigning.
314          */
315         ptr = buffer;
316         proto_ver = *((__s32 *) ptr);
317         ptr += sizeof(__s32);
318
319         magic = *((__s32 *) ptr);
320         ptr += sizeof(__s32);
321
322         tag = *((__u64 *) ptr);
323         ptr += sizeof(__u64);
324
325         if (magic != PVFS2_DEVREQ_MAGIC) {
326                 gossip_err("Error: Device magic number does not match.\n");
327                 dev_req_release(buffer);
328                 return -EPROTO;
329         }
330
331         /*
332          * proto_ver = 20902 for 2.9.2
333          */
334
335         op = pvfs2_devreq_remove_op(tag);
336         if (op) {
337                 /* Increase ref count! */
338                 get_op(op);
339                 /* cut off magic and tag from payload size */
340                 payload_size -= (2 * sizeof(__s32) + sizeof(__u64));
341                 if (payload_size <= sizeof(struct pvfs2_downcall_s))
342                         /* copy the passed in downcall into the op */
343                         memcpy(&op->downcall,
344                                ptr,
345                                sizeof(struct pvfs2_downcall_s));
346                 else
347                         gossip_debug(GOSSIP_DEV_DEBUG,
348                                      "writev: Ignoring %d bytes\n",
349                                      payload_size);
350
351                 /* Do not allocate needlessly if client-core forgets
352                  * to reset trailer size on op errors.
353                  */
354                 if (op->downcall.status == 0 && op->downcall.trailer_size > 0) {
355                         gossip_debug(GOSSIP_DEV_DEBUG,
356                                      "writev: trailer size %ld\n",
357                                      (unsigned long)op->downcall.trailer_size);
358                         if (count != (notrailer_count + 1)) {
359                                 gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%ld)\n", (unsigned long)op->downcall.trailer_size, count);
360                                 dev_req_release(buffer);
361                                 put_op(op);
362                                 return -EPROTO;
363                         }
364                         if (iov[notrailer_count].iov_len >
365                             op->downcall.trailer_size) {
366                                 gossip_err("writev error: trailer size (%ld) != iov_len (%ld)\n", (unsigned long)op->downcall.trailer_size, (unsigned long)iov[notrailer_count].iov_len);
367                                 dev_req_release(buffer);
368                                 put_op(op);
369                                 return -EMSGSIZE;
370                         }
371                         /* Allocate a buffer large enough to hold the
372                          * trailer bytes.
373                          */
374                         op->downcall.trailer_buf =
375                             vmalloc(op->downcall.trailer_size);
376                         if (op->downcall.trailer_buf != NULL) {
377                                 gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n",
378                                              op->downcall.trailer_buf);
379                                 ret = copy_from_user(op->downcall.trailer_buf,
380                                                      iov[notrailer_count].
381                                                      iov_base,
382                                                      iov[notrailer_count].
383                                                      iov_len);
384                                 if (ret) {
385                                         gossip_err("Failed to copy trailer data from user space\n");
386                                         dev_req_release(buffer);
387                                         gossip_debug(GOSSIP_DEV_DEBUG,
388                                                      "vfree: %p\n",
389                                                      op->downcall.trailer_buf);
390                                         vfree(op->downcall.trailer_buf);
391                                         op->downcall.trailer_buf = NULL;
392                                         put_op(op);
393                                         return -EIO;
394                                 }
395                         } else {
396                                 /* Change downcall status */
397                                 op->downcall.status = -ENOMEM;
398                                 gossip_err("writev: could not vmalloc for trailer!\n");
399                         }
400                 }
401
402                 /* if this operation is an I/O operation and if it was
403                  * initiated on behalf of a *synchronous* VFS I/O operation,
404                  * only then we need to wait
405                  * for all data to be copied before we can return to avoid
406                  * buffer corruption and races that can pull the buffers
407                  * out from under us.
408                  *
409                  * Essentially we're synchronizing with other parts of the
410                  * vfs implicitly by not allowing the user space
411                  * application reading/writing this device to return until
412                  * the buffers are done being used.
413                  */
414                 if ((op->upcall.type == PVFS2_VFS_OP_FILE_IO &&
415                      op->upcall.req.io.async_vfs_io == PVFS_VFS_SYNC_IO) ||
416                      op->upcall.type == PVFS2_VFS_OP_FILE_IOX) {
417                         int timed_out = 0;
418                         DECLARE_WAITQUEUE(wait_entry, current);
419
420                         /* tell the vfs op waiting on a waitqueue
421                          * that this op is done
422                          */
423                         spin_lock(&op->lock);
424                         set_op_state_serviced(op);
425                         spin_unlock(&op->lock);
426
427                         add_wait_queue_exclusive(&op->io_completion_waitq,
428                                                  &wait_entry);
429                         wake_up_interruptible(&op->waitq);
430
431                         while (1) {
432                                 set_current_state(TASK_INTERRUPTIBLE);
433
434                                 spin_lock(&op->lock);
435                                 if (op->io_completed) {
436                                         spin_unlock(&op->lock);
437                                         break;
438                                 }
439                                 spin_unlock(&op->lock);
440
441                                 if (!signal_pending(current)) {
442                                         int timeout =
443                                             MSECS_TO_JIFFIES(1000 *
444                                                              op_timeout_secs);
445                                         if (!schedule_timeout(timeout)) {
446                                                 gossip_debug(GOSSIP_DEV_DEBUG, "*** I/O wait time is up\n");
447                                                 timed_out = 1;
448                                                 break;
449                                         }
450                                         continue;
451                                 }
452
453                                 gossip_debug(GOSSIP_DEV_DEBUG, "*** signal on I/O wait -- aborting\n");
454                                 break;
455                         }
456
457                         set_current_state(TASK_RUNNING);
458                         remove_wait_queue(&op->io_completion_waitq,
459                                           &wait_entry);
460
461                         /* NOTE: for I/O operations we handle releasing the op
462                          * object except in the case of timeout.  the reason we
463                          * can't free the op in timeout cases is that the op
464                          * service logic in the vfs retries operations using
465                          * the same op ptr, thus it can't be freed.
466                          */
467                         if (!timed_out)
468                                 op_release(op);
469                 } else {
470
471                         /*
472                          * tell the vfs op waiting on a waitqueue that
473                          * this op is done
474                          */
475                         spin_lock(&op->lock);
476                         set_op_state_serviced(op);
477                         spin_unlock(&op->lock);
478                         /*
479                            for every other operation (i.e. non-I/O), we need to
480                            wake up the callers for downcall completion
481                            notification
482                          */
483                         wake_up_interruptible(&op->waitq);
484                 }
485         } else {
486                 /* ignore downcalls that we're not interested in */
487                 gossip_debug(GOSSIP_DEV_DEBUG,
488                              "WARNING: No one's waiting for tag %llu\n",
489                              llu(tag));
490         }
491         dev_req_release(buffer);
492
493         return total_returned_size;
494 }
495
496 static ssize_t pvfs2_devreq_write_iter(struct kiocb *iocb,
497                                       struct iov_iter *iter)
498 {
499         return pvfs2_devreq_writev(iocb->ki_filp,
500                                    iter->iov,
501                                    iter->nr_segs,
502                                    &iocb->ki_pos);
503 }
504
505 /* Returns whether any FS are still pending remounted */
506 static int mark_all_pending_mounts(void)
507 {
508         int unmounted = 1;
509         struct pvfs2_sb_info_s *pvfs2_sb = NULL;
510
511         spin_lock(&pvfs2_superblocks_lock);
512         list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) {
513                 /* All of these file system require a remount */
514                 pvfs2_sb->mount_pending = 1;
515                 unmounted = 0;
516         }
517         spin_unlock(&pvfs2_superblocks_lock);
518         return unmounted;
519 }
520
521 /*
522  * Determine if a given file system needs to be remounted or not
523  *  Returns -1 on error
524  *           0 if already mounted
525  *           1 if needs remount
526  */
527 int fs_mount_pending(__s32 fsid)
528 {
529         int mount_pending = -1;
530         struct pvfs2_sb_info_s *pvfs2_sb = NULL;
531
532         spin_lock(&pvfs2_superblocks_lock);
533         list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) {
534                 if (pvfs2_sb->fs_id == fsid) {
535                         mount_pending = pvfs2_sb->mount_pending;
536                         break;
537                 }
538         }
539         spin_unlock(&pvfs2_superblocks_lock);
540         return mount_pending;
541 }
542
543 /*
544  * NOTE: gets called when the last reference to this device is dropped.
545  * Using the open_access_count variable, we enforce a reference count
546  * on this file so that it can be opened by only one process at a time.
547  * the devreq_mutex is used to make sure all i/o has completed
548  * before we call pvfs_bufmap_finalize, and similar such tricky
549  * situations
550  */
551 static int pvfs2_devreq_release(struct inode *inode, struct file *file)
552 {
553         int unmounted = 0;
554
555         gossip_debug(GOSSIP_DEV_DEBUG,
556                      "%s:pvfs2-client-core: exiting, closing device\n",
557                      __func__);
558
559         mutex_lock(&devreq_mutex);
560         pvfs_bufmap_finalize();
561
562         open_access_count--;
563
564         unmounted = mark_all_pending_mounts();
565         gossip_debug(GOSSIP_DEV_DEBUG, "PVFS2 Device Close: Filesystem(s) %s\n",
566                      (unmounted ? "UNMOUNTED" : "MOUNTED"));
567         mutex_unlock(&devreq_mutex);
568
569         /*
570          * Walk through the list of ops in the request list, mark them
571          * as purged and wake them up.
572          */
573         purge_waiting_ops();
574         /*
575          * Walk through the hash table of in progress operations; mark
576          * them as purged and wake them up
577          */
578         purge_inprogress_ops();
579         gossip_debug(GOSSIP_DEV_DEBUG,
580                      "pvfs2-client-core: device close complete\n");
581         return 0;
582 }
583
584 int is_daemon_in_service(void)
585 {
586         int in_service;
587
588         /*
589          * What this function does is checks if client-core is alive
590          * based on the access count we maintain on the device.
591          */
592         mutex_lock(&devreq_mutex);
593         in_service = open_access_count == 1 ? 0 : -EIO;
594         mutex_unlock(&devreq_mutex);
595         return in_service;
596 }
597
598 static inline long check_ioctl_command(unsigned int command)
599 {
600         /* Check for valid ioctl codes */
601         if (_IOC_TYPE(command) != PVFS_DEV_MAGIC) {
602                 gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
603                         command,
604                         _IOC_TYPE(command),
605                         PVFS_DEV_MAGIC);
606                 return -EINVAL;
607         }
608         /* and valid ioctl commands */
609         if (_IOC_NR(command) >= PVFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
610                 gossip_err("Invalid ioctl command number [%d >= %d]\n",
611                            _IOC_NR(command), PVFS_DEV_MAXNR);
612                 return -ENOIOCTLCMD;
613         }
614         return 0;
615 }
616
617 static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
618 {
619         static __s32 magic = PVFS2_DEVREQ_MAGIC;
620         static __s32 max_up_size = MAX_ALIGNED_DEV_REQ_UPSIZE;
621         static __s32 max_down_size = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
622         struct PVFS_dev_map_desc user_desc;
623         int ret = 0;
624         struct dev_mask_info_s mask_info = { 0 };
625         struct dev_mask2_info_s mask2_info = { 0, 0 };
626         int upstream_kmod = 1;
627         struct list_head *tmp = NULL;
628         struct pvfs2_sb_info_s *pvfs2_sb = NULL;
629
630         /* mtmoore: add locking here */
631
632         switch (command) {
633         case PVFS_DEV_GET_MAGIC:
634                 return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ?
635                         -EIO :
636                         0);
637         case PVFS_DEV_GET_MAX_UPSIZE:
638                 return ((put_user(max_up_size,
639                                   (__s32 __user *) arg) == -EFAULT) ?
640                                         -EIO :
641                                         0);
642         case PVFS_DEV_GET_MAX_DOWNSIZE:
643                 return ((put_user(max_down_size,
644                                   (__s32 __user *) arg) == -EFAULT) ?
645                                         -EIO :
646                                         0);
647         case PVFS_DEV_MAP:
648                 ret = copy_from_user(&user_desc,
649                                      (struct PVFS_dev_map_desc __user *)
650                                      arg,
651                                      sizeof(struct PVFS_dev_map_desc));
652                 return ret ? -EIO : pvfs_bufmap_initialize(&user_desc);
653         case PVFS_DEV_REMOUNT_ALL:
654                 gossip_debug(GOSSIP_DEV_DEBUG,
655                              "pvfs2_devreq_ioctl: got PVFS_DEV_REMOUNT_ALL\n");
656
657                 /*
658                  * remount all mounted pvfs2 volumes to regain the lost
659                  * dynamic mount tables (if any) -- NOTE: this is done
660                  * without keeping the superblock list locked due to the
661                  * upcall/downcall waiting.  also, the request semaphore is
662                  * used to ensure that no operations will be serviced until
663                  * all of the remounts are serviced (to avoid ops between
664                  * mounts to fail)
665                  */
666                 ret = mutex_lock_interruptible(&request_mutex);
667                 if (ret < 0)
668                         return ret;
669                 gossip_debug(GOSSIP_DEV_DEBUG,
670                              "pvfs2_devreq_ioctl: priority remount in progress\n");
671                 list_for_each(tmp, &pvfs2_superblocks) {
672                         pvfs2_sb =
673                                 list_entry(tmp, struct pvfs2_sb_info_s, list);
674                         if (pvfs2_sb && (pvfs2_sb->sb)) {
675                                 gossip_debug(GOSSIP_DEV_DEBUG,
676                                              "Remounting SB %p\n",
677                                              pvfs2_sb);
678
679                                 ret = pvfs2_remount(pvfs2_sb->sb);
680                                 if (ret) {
681                                         gossip_debug(GOSSIP_DEV_DEBUG,
682                                                      "SB %p remount failed\n",
683                                                      pvfs2_sb);
684                                                 break;
685                                 }
686                         }
687                 }
688                 gossip_debug(GOSSIP_DEV_DEBUG,
689                              "pvfs2_devreq_ioctl: priority remount complete\n");
690                 mutex_unlock(&request_mutex);
691                 return ret;
692
693         case PVFS_DEV_UPSTREAM:
694                 ret = copy_to_user((void __user *)arg,
695                                     &upstream_kmod,
696                                     sizeof(upstream_kmod));
697
698                 if (ret != 0)
699                         return -EIO;
700                 else
701                         return ret;
702
703         case PVFS_DEV_CLIENT_MASK:
704                 ret = copy_from_user(&mask2_info,
705                                      (void __user *)arg,
706                                      sizeof(struct dev_mask2_info_s));
707
708                 if (ret != 0)
709                         return -EIO;
710
711                 client_debug_mask.mask1 = mask2_info.mask1_value;
712                 client_debug_mask.mask2 = mask2_info.mask2_value;
713
714                 pr_info("%s: client debug mask has been been received "
715                         ":%llx: :%llx:\n",
716                         __func__,
717                         (unsigned long long)client_debug_mask.mask1,
718                         (unsigned long long)client_debug_mask.mask2);
719
720                 return ret;
721
722         case PVFS_DEV_CLIENT_STRING:
723                 ret = copy_from_user(&client_debug_array_string,
724                                      (void __user *)arg,
725                                      PVFS2_MAX_DEBUG_STRING_LEN);
726                 if (ret != 0) {
727                         pr_info("%s: "
728                                 "PVFS_DEV_CLIENT_STRING: copy_from_user failed"
729                                 "\n",
730                                 __func__);
731                         return -EIO;
732                 }
733
734                 pr_info("%s: client debug array string has been been received."
735                         "\n",
736                         __func__);
737
738                 if (!help_string_initialized) {
739
740                         /* Free the "we don't know yet" default string... */
741                         kfree(debug_help_string);
742
743                         /* build a proper debug help string */
744                         if (orangefs_prepare_debugfs_help_string(0)) {
745                                 gossip_err("%s: "
746                                            "prepare_debugfs_help_string failed"
747                                            "\n",
748                                            __func__);
749                                 return -EIO;
750                         }
751
752                         /* Replace the boilerplate boot-time debug-help file. */
753                         debugfs_remove(help_file_dentry);
754
755                         help_file_dentry =
756                                 debugfs_create_file(
757                                         ORANGEFS_KMOD_DEBUG_HELP_FILE,
758                                         0444,
759                                         debug_dir,
760                                         debug_help_string,
761                                         &debug_help_fops);
762
763                         if (!help_file_dentry) {
764                                 gossip_err("%s: debugfs_create_file failed for"
765                                            " :%s:!\n",
766                                            __func__,
767                                            ORANGEFS_KMOD_DEBUG_HELP_FILE);
768                                 return -EIO;
769                         }
770                 }
771
772                 debug_mask_to_string(&client_debug_mask, 1);
773
774                 debugfs_remove(client_debug_dentry);
775
776                 pvfs2_client_debug_init();
777
778                 help_string_initialized++;
779
780                 return ret;
781
782         case PVFS_DEV_DEBUG:
783                 ret = copy_from_user(&mask_info,
784                                      (void __user *)arg,
785                                      sizeof(mask_info));
786
787                 if (ret != 0)
788                         return -EIO;
789
790                 if (mask_info.mask_type == KERNEL_MASK) {
791                         if ((mask_info.mask_value == 0)
792                             && (kernel_mask_set_mod_init)) {
793                                 /*
794                                  * the kernel debug mask was set when the
795                                  * kernel module was loaded; don't override
796                                  * it if the client-core was started without
797                                  * a value for PVFS2_KMODMASK.
798                                  */
799                                 return 0;
800                         }
801                         debug_mask_to_string(&mask_info.mask_value,
802                                              mask_info.mask_type);
803                         gossip_debug_mask = mask_info.mask_value;
804                         pr_info("PVFS: kernel debug mask has been modified to "
805                                 ":%s: :%llx:\n",
806                                 kernel_debug_string,
807                                 (unsigned long long)gossip_debug_mask);
808                 } else if (mask_info.mask_type == CLIENT_MASK) {
809                         debug_mask_to_string(&mask_info.mask_value,
810                                              mask_info.mask_type);
811                         pr_info("PVFS: client debug mask has been modified to"
812                                 ":%s: :%llx:\n",
813                                 client_debug_string,
814                                 llu(mask_info.mask_value));
815                 } else {
816                         gossip_lerr("Invalid mask type....\n");
817                         return -EINVAL;
818                 }
819
820                 return ret;
821
822         default:
823                 return -ENOIOCTLCMD;
824         }
825         return -ENOIOCTLCMD;
826 }
827
828 static long pvfs2_devreq_ioctl(struct file *file,
829                                unsigned int command, unsigned long arg)
830 {
831         long ret;
832
833         /* Check for properly constructed commands */
834         ret = check_ioctl_command(command);
835         if (ret < 0)
836                 return (int)ret;
837
838         return (int)dispatch_ioctl_command(command, arg);
839 }
840
841 #ifdef CONFIG_COMPAT            /* CONFIG_COMPAT is in .config */
842
843 /*  Compat structure for the PVFS_DEV_MAP ioctl */
844 struct PVFS_dev_map_desc32 {
845         compat_uptr_t ptr;
846         __s32 total_size;
847         __s32 size;
848         __s32 count;
849 };
850
851 static unsigned long translate_dev_map26(unsigned long args, long *error)
852 {
853         struct PVFS_dev_map_desc32 __user *p32 = (void __user *)args;
854         /*
855          * Depending on the architecture, allocate some space on the
856          * user-call-stack based on our expected layout.
857          */
858         struct PVFS_dev_map_desc __user *p =
859             compat_alloc_user_space(sizeof(*p));
860         u32 addr;
861
862         *error = 0;
863         /* get the ptr from the 32 bit user-space */
864         if (get_user(addr, &p32->ptr))
865                 goto err;
866         /* try to put that into a 64-bit layout */
867         if (put_user(compat_ptr(addr), &p->ptr))
868                 goto err;
869         /* copy the remaining fields */
870         if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32)))
871                 goto err;
872         if (copy_in_user(&p->size, &p32->size, sizeof(__s32)))
873                 goto err;
874         if (copy_in_user(&p->count, &p32->count, sizeof(__s32)))
875                 goto err;
876         return (unsigned long)p;
877 err:
878         *error = -EFAULT;
879         return 0;
880 }
881
882 /*
883  * 32 bit user-space apps' ioctl handlers when kernel modules
884  * is compiled as a 64 bit one
885  */
886 static long pvfs2_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
887                                       unsigned long args)
888 {
889         long ret;
890         unsigned long arg = args;
891
892         /* Check for properly constructed commands */
893         ret = check_ioctl_command(cmd);
894         if (ret < 0)
895                 return ret;
896         if (cmd == PVFS_DEV_MAP) {
897                 /*
898                  * convert the arguments to what we expect internally
899                  * in kernel space
900                  */
901                 arg = translate_dev_map26(args, &ret);
902                 if (ret < 0) {
903                         gossip_err("Could not translate dev map\n");
904                         return ret;
905                 }
906         }
907         /* no other ioctl requires translation */
908         return dispatch_ioctl_command(cmd, arg);
909 }
910
911 static int pvfs2_ioctl32_init(void)
912 {
913         return 0;
914 }
915
916 static void pvfs2_ioctl32_cleanup(void)
917 {
918         return;
919 }
920
921 #endif /* CONFIG_COMPAT is in .config */
922
923 /* the assigned character device major number */
924 static int pvfs2_dev_major;
925
926 /*
927  * Initialize pvfs2 device specific state:
928  * Must be called at module load time only
929  */
930 int pvfs2_dev_init(void)
931 {
932         int ret;
933
934         /* register the ioctl32 sub-system */
935         ret = pvfs2_ioctl32_init();
936         if (ret < 0)
937                 return ret;
938
939         /* register pvfs2-req device  */
940         pvfs2_dev_major = register_chrdev(0,
941                                           PVFS2_REQDEVICE_NAME,
942                                           &pvfs2_devreq_file_operations);
943         if (pvfs2_dev_major < 0) {
944                 gossip_debug(GOSSIP_DEV_DEBUG,
945                              "Failed to register /dev/%s (error %d)\n",
946                              PVFS2_REQDEVICE_NAME, pvfs2_dev_major);
947                 pvfs2_ioctl32_cleanup();
948                 return pvfs2_dev_major;
949         }
950
951         gossip_debug(GOSSIP_DEV_DEBUG,
952                      "*** /dev/%s character device registered ***\n",
953                      PVFS2_REQDEVICE_NAME);
954         gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n",
955                      PVFS2_REQDEVICE_NAME, pvfs2_dev_major);
956         return 0;
957 }
958
959 void pvfs2_dev_cleanup(void)
960 {
961         unregister_chrdev(pvfs2_dev_major, PVFS2_REQDEVICE_NAME);
962         gossip_debug(GOSSIP_DEV_DEBUG,
963                      "*** /dev/%s character device unregistered ***\n",
964                      PVFS2_REQDEVICE_NAME);
965         /* unregister the ioctl32 sub-system */
966         pvfs2_ioctl32_cleanup();
967 }
968
969 static unsigned int pvfs2_devreq_poll(struct file *file,
970                                       struct poll_table_struct *poll_table)
971 {
972         int poll_revent_mask = 0;
973
974         if (open_access_count == 1) {
975                 poll_wait(file, &pvfs2_request_list_waitq, poll_table);
976
977                 spin_lock(&pvfs2_request_list_lock);
978                 if (!list_empty(&pvfs2_request_list))
979                         poll_revent_mask |= POLL_IN;
980                 spin_unlock(&pvfs2_request_list_lock);
981         }
982         return poll_revent_mask;
983 }
984
985 const struct file_operations pvfs2_devreq_file_operations = {
986         .owner = THIS_MODULE,
987         .read = pvfs2_devreq_read,
988         .write_iter = pvfs2_devreq_write_iter,
989         .open = pvfs2_devreq_open,
990         .release = pvfs2_devreq_release,
991         .unlocked_ioctl = pvfs2_devreq_ioctl,
992
993 #ifdef CONFIG_COMPAT            /* CONFIG_COMPAT is in .config */
994         .compat_ioctl = pvfs2_devreq_compat_ioctl,
995 #endif
996         .poll = pvfs2_devreq_poll
997 };