]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/orangefs/waitqueue.c
Merge remote-tracking branch 'percpu/for-next'
[karo-tx-linux.git] / fs / orangefs / waitqueue.c
1 /*
2  * (C) 2001 Clemson University and The University of Chicago
3  * (C) 2011 Omnibond Systems
4  *
5  * Changes by Acxiom Corporation to implement generic service_operation()
6  * function, Copyright Acxiom Corporation, 2005.
7  *
8  * See COPYING in top-level directory.
9  */
10
11 /*
12  *  In-kernel waitqueue operations.
13  */
14
15 #include "protocol.h"
16 #include "pvfs2-kernel.h"
17 #include "pvfs2-bufmap.h"
18
19 /*
20  * What we do in this function is to walk the list of operations that are
21  * present in the request queue and mark them as purged.
22  * NOTE: This is called from the device close after client-core has
23  * guaranteed that no new operations could appear on the list since the
24  * client-core is anyway going to exit.
25  */
26 void purge_waiting_ops(void)
27 {
28         struct pvfs2_kernel_op_s *op;
29
30         spin_lock(&pvfs2_request_list_lock);
31         list_for_each_entry(op, &pvfs2_request_list, list) {
32                 gossip_debug(GOSSIP_WAIT_DEBUG,
33                              "pvfs2-client-core: purging op tag %llu %s\n",
34                              llu(op->tag),
35                              get_opname_string(op));
36                 spin_lock(&op->lock);
37                 set_op_state_purged(op);
38                 spin_unlock(&op->lock);
39                 wake_up_interruptible(&op->waitq);
40         }
41         spin_unlock(&pvfs2_request_list_lock);
42 }
43
44 /*
45  * submits a PVFS2 operation and waits for it to complete
46  *
47  * Note op->downcall.status will contain the status of the operation (in
48  * errno format), whether provided by pvfs2-client or a result of failure to
49  * service the operation.  If the caller wishes to distinguish, then
50  * op->state can be checked to see if it was serviced or not.
51  *
52  * Returns contents of op->downcall.status for convenience
53  */
54 int service_operation(struct pvfs2_kernel_op_s *op,
55                       const char *op_name,
56                       int flags)
57 {
58         /* flags to modify behavior */
59         sigset_t orig_sigset;
60         int ret = 0;
61
62         /* irqflags and wait_entry are only used IF the client-core aborts */
63         unsigned long irqflags;
64
65         DECLARE_WAITQUEUE(wait_entry, current);
66
67         op->upcall.tgid = current->tgid;
68         op->upcall.pid = current->pid;
69
70 retry_servicing:
71         op->downcall.status = 0;
72         gossip_debug(GOSSIP_WAIT_DEBUG,
73                      "pvfs2: service_operation: %s %p\n",
74                      op_name,
75                      op);
76         gossip_debug(GOSSIP_WAIT_DEBUG,
77                      "pvfs2: operation posted by process: %s, pid: %i\n",
78                      current->comm,
79                      current->pid);
80
81         /* mask out signals if this operation is not to be interrupted */
82         if (!(flags & PVFS2_OP_INTERRUPTIBLE))
83                 block_signals(&orig_sigset);
84
85         if (!(flags & PVFS2_OP_NO_SEMAPHORE)) {
86                 ret = mutex_lock_interruptible(&request_mutex);
87                 /*
88                  * check to see if we were interrupted while waiting for
89                  * semaphore
90                  */
91                 if (ret < 0) {
92                         if (!(flags & PVFS2_OP_INTERRUPTIBLE))
93                                 set_signals(&orig_sigset);
94                         op->downcall.status = ret;
95                         gossip_debug(GOSSIP_WAIT_DEBUG,
96                                      "pvfs2: service_operation interrupted.\n");
97                         return ret;
98                 }
99         }
100
101         gossip_debug(GOSSIP_WAIT_DEBUG,
102                      "%s:About to call is_daemon_in_service().\n",
103                      __func__);
104
105         if (is_daemon_in_service() < 0) {
106                 /*
107                  * By incrementing the per-operation attempt counter, we
108                  * directly go into the timeout logic while waiting for
109                  * the matching downcall to be read
110                  */
111                 gossip_debug(GOSSIP_WAIT_DEBUG,
112                              "%s:client core is NOT in service(%d).\n",
113                              __func__,
114                              is_daemon_in_service());
115                 op->attempts++;
116         }
117
118         /* queue up the operation */
119         if (flags & PVFS2_OP_PRIORITY) {
120                 add_priority_op_to_request_list(op);
121         } else {
122                 gossip_debug(GOSSIP_WAIT_DEBUG,
123                              "%s:About to call add_op_to_request_list().\n",
124                              __func__);
125                 add_op_to_request_list(op);
126         }
127
128         if (!(flags & PVFS2_OP_NO_SEMAPHORE))
129                 mutex_unlock(&request_mutex);
130
131         /*
132          * If we are asked to service an asynchronous operation from
133          * VFS perspective, we are done.
134          */
135         if (flags & PVFS2_OP_ASYNC)
136                 return 0;
137
138         if (flags & PVFS2_OP_CANCELLATION) {
139                 gossip_debug(GOSSIP_WAIT_DEBUG,
140                              "%s:"
141                              "About to call wait_for_cancellation_downcall.\n",
142                              __func__);
143                 ret = wait_for_cancellation_downcall(op);
144         } else {
145                 ret = wait_for_matching_downcall(op);
146         }
147
148         if (ret < 0) {
149                 /* failed to get matching downcall */
150                 if (ret == -ETIMEDOUT) {
151                         gossip_err("pvfs2: %s -- wait timed out; aborting attempt.\n",
152                                    op_name);
153                 }
154                 op->downcall.status = ret;
155         } else {
156                 /* got matching downcall; make sure status is in errno format */
157                 op->downcall.status =
158                     pvfs2_normalize_to_errno(op->downcall.status);
159                 ret = op->downcall.status;
160         }
161
162         if (!(flags & PVFS2_OP_INTERRUPTIBLE))
163                 set_signals(&orig_sigset);
164
165         BUG_ON(ret != op->downcall.status);
166         /* retry if operation has not been serviced and if requested */
167         if (!op_state_serviced(op) && op->downcall.status == -EAGAIN) {
168                 gossip_debug(GOSSIP_WAIT_DEBUG,
169                              "pvfs2: tag %llu (%s)"
170                              " -- operation to be retried (%d attempt)\n",
171                              llu(op->tag),
172                              op_name,
173                              op->attempts + 1);
174
175                 if (!op->uses_shared_memory)
176                         /*
177                          * this operation doesn't use the shared memory
178                          * system
179                          */
180                         goto retry_servicing;
181
182                 /* op uses shared memory */
183                 if (get_bufmap_init() == 0) {
184                         /*
185                          * This operation uses the shared memory system AND
186                          * the system is not yet ready. This situation occurs
187                          * when the client-core is restarted AND there were
188                          * operations waiting to be processed or were already
189                          * in process.
190                          */
191                         gossip_debug(GOSSIP_WAIT_DEBUG,
192                                      "uses_shared_memory is true.\n");
193                         gossip_debug(GOSSIP_WAIT_DEBUG,
194                                      "Client core in-service status(%d).\n",
195                                      is_daemon_in_service());
196                         gossip_debug(GOSSIP_WAIT_DEBUG, "bufmap_init:%d.\n",
197                                      get_bufmap_init());
198                         gossip_debug(GOSSIP_WAIT_DEBUG,
199                                      "operation's status is 0x%0x.\n",
200                                      op->op_state);
201
202                         /*
203                          * let process sleep for a few seconds so shared
204                          * memory system can be initialized.
205                          */
206                         spin_lock_irqsave(&op->lock, irqflags);
207                         add_wait_queue(&pvfs2_bufmap_init_waitq, &wait_entry);
208                         spin_unlock_irqrestore(&op->lock, irqflags);
209
210                         set_current_state(TASK_INTERRUPTIBLE);
211
212                         /*
213                          * Wait for pvfs_bufmap_initialize() to wake me up
214                          * within the allotted time.
215                          */
216                         ret = schedule_timeout(MSECS_TO_JIFFIES
217                                 (1000 * PVFS2_BUFMAP_WAIT_TIMEOUT_SECS));
218
219                         gossip_debug(GOSSIP_WAIT_DEBUG,
220                                      "Value returned from schedule_timeout:"
221                                      "%d.\n",
222                                      ret);
223                         gossip_debug(GOSSIP_WAIT_DEBUG,
224                                      "Is shared memory available? (%d).\n",
225                                      get_bufmap_init());
226
227                         spin_lock_irqsave(&op->lock, irqflags);
228                         remove_wait_queue(&pvfs2_bufmap_init_waitq,
229                                           &wait_entry);
230                         spin_unlock_irqrestore(&op->lock, irqflags);
231
232                         if (get_bufmap_init() == 0) {
233                                 gossip_err("%s:The shared memory system has not started in %d seconds after the client core restarted.  Aborting user's request(%s).\n",
234                                            __func__,
235                                            PVFS2_BUFMAP_WAIT_TIMEOUT_SECS,
236                                            get_opname_string(op));
237                                 return -EIO;
238                         }
239
240                         /*
241                          * Return to the calling function and re-populate a
242                          * shared memory buffer.
243                          */
244                         return -EAGAIN;
245                 }
246         }
247
248         gossip_debug(GOSSIP_WAIT_DEBUG,
249                      "pvfs2: service_operation %s returning: %d for %p.\n",
250                      op_name,
251                      ret,
252                      op);
253         return ret;
254 }
255
256 void pvfs2_clean_up_interrupted_operation(struct pvfs2_kernel_op_s *op)
257 {
258         /*
259          * handle interrupted cases depending on what state we were in when
260          * the interruption is detected.  there is a coarse grained lock
261          * across the operation.
262          *
263          * NOTE: be sure not to reverse lock ordering by locking an op lock
264          * while holding the request_list lock.  Here, we first lock the op
265          * and then lock the appropriate list.
266          */
267         if (!op) {
268                 gossip_debug(GOSSIP_WAIT_DEBUG,
269                             "%s: op is null, ignoring\n",
270                              __func__);
271                 return;
272         }
273
274         /*
275          * one more sanity check, make sure it's in one of the possible states
276          * or don't try to cancel it
277          */
278         if (!(op_state_waiting(op) ||
279               op_state_in_progress(op) ||
280               op_state_serviced(op) ||
281               op_state_purged(op))) {
282                 gossip_debug(GOSSIP_WAIT_DEBUG,
283                              "%s: op %p not in a valid state (%0x), "
284                              "ignoring\n",
285                              __func__,
286                              op,
287                              op->op_state);
288                 return;
289         }
290
291         spin_lock(&op->lock);
292
293         if (op_state_waiting(op)) {
294                 /*
295                  * upcall hasn't been read; remove op from upcall request
296                  * list.
297                  */
298                 spin_unlock(&op->lock);
299                 remove_op_from_request_list(op);
300                 gossip_debug(GOSSIP_WAIT_DEBUG,
301                              "Interrupted: Removed op %p from request_list\n",
302                              op);
303         } else if (op_state_in_progress(op)) {
304                 /* op must be removed from the in progress htable */
305                 spin_unlock(&op->lock);
306                 spin_lock(&htable_ops_in_progress_lock);
307                 list_del(&op->list);
308                 spin_unlock(&htable_ops_in_progress_lock);
309                 gossip_debug(GOSSIP_WAIT_DEBUG,
310                              "Interrupted: Removed op %p"
311                              " from htable_ops_in_progress\n",
312                              op);
313         } else if (!op_state_serviced(op)) {
314                 spin_unlock(&op->lock);
315                 gossip_err("interrupted operation is in a weird state 0x%x\n",
316                            op->op_state);
317         } else {
318                 /*
319                  * It is not intended for execution to flow here,
320                  * but having this unlock here makes sparse happy.
321                  */
322                 gossip_err("%s: can't get here.\n", __func__);
323                 spin_unlock(&op->lock);
324         }
325 }
326
327 /*
328  * sleeps on waitqueue waiting for matching downcall.
329  * if client-core finishes servicing, then we are good to go.
330  * else if client-core exits, we get woken up here, and retry with a timeout
331  *
332  * Post when this call returns to the caller, the specified op will no
333  * longer be on any list or htable.
334  *
335  * Returns 0 on success and -errno on failure
336  * Errors are:
337  * EAGAIN in case we want the caller to requeue and try again..
338  * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
339  * operation since client-core seems to be exiting too often
340  * or if we were interrupted.
341  */
342 int wait_for_matching_downcall(struct pvfs2_kernel_op_s *op)
343 {
344         int ret = -EINVAL;
345         DECLARE_WAITQUEUE(wait_entry, current);
346
347         spin_lock(&op->lock);
348         add_wait_queue(&op->waitq, &wait_entry);
349         spin_unlock(&op->lock);
350
351         while (1) {
352                 set_current_state(TASK_INTERRUPTIBLE);
353
354                 spin_lock(&op->lock);
355                 if (op_state_serviced(op)) {
356                         spin_unlock(&op->lock);
357                         ret = 0;
358                         break;
359                 }
360                 spin_unlock(&op->lock);
361
362                 if (!signal_pending(current)) {
363                         /*
364                          * if this was our first attempt and client-core
365                          * has not purged our operation, we are happy to
366                          * simply wait
367                          */
368                         spin_lock(&op->lock);
369                         if (op->attempts == 0 && !op_state_purged(op)) {
370                                 spin_unlock(&op->lock);
371                                 schedule();
372                         } else {
373                                 spin_unlock(&op->lock);
374                                 /*
375                                  * subsequent attempts, we retry exactly once
376                                  * with timeouts
377                                  */
378                                 if (!schedule_timeout(MSECS_TO_JIFFIES
379                                       (1000 * op_timeout_secs))) {
380                                         gossip_debug(GOSSIP_WAIT_DEBUG,
381                                                      "*** %s:"
382                                                      " operation timed out (tag"
383                                                      " %llu, %p, att %d)\n",
384                                                      __func__,
385                                                      llu(op->tag),
386                                                      op,
387                                                      op->attempts);
388                                         ret = -ETIMEDOUT;
389                                         pvfs2_clean_up_interrupted_operation
390                                             (op);
391                                         break;
392                                 }
393                         }
394                         spin_lock(&op->lock);
395                         op->attempts++;
396                         /*
397                          * if the operation was purged in the meantime, it
398                          * is better to requeue it afresh but ensure that
399                          * we have not been purged repeatedly. This could
400                          * happen if client-core crashes when an op
401                          * is being serviced, so we requeue the op, client
402                          * core crashes again so we requeue the op, client
403                          * core starts, and so on...
404                          */
405                         if (op_state_purged(op)) {
406                                 ret = (op->attempts < PVFS2_PURGE_RETRY_COUNT) ?
407                                          -EAGAIN :
408                                          -EIO;
409                                 spin_unlock(&op->lock);
410                                 gossip_debug(GOSSIP_WAIT_DEBUG,
411                                              "*** %s:"
412                                              " operation purged (tag "
413                                              "%llu, %p, att %d)\n",
414                                              __func__,
415                                              llu(op->tag),
416                                              op,
417                                              op->attempts);
418                                 pvfs2_clean_up_interrupted_operation(op);
419                                 break;
420                         }
421                         spin_unlock(&op->lock);
422                         continue;
423                 }
424
425                 gossip_debug(GOSSIP_WAIT_DEBUG,
426                              "*** %s:"
427                              " operation interrupted by a signal (tag "
428                              "%llu, op %p)\n",
429                              __func__,
430                              llu(op->tag),
431                              op);
432                 pvfs2_clean_up_interrupted_operation(op);
433                 ret = -EINTR;
434                 break;
435         }
436
437         set_current_state(TASK_RUNNING);
438
439         spin_lock(&op->lock);
440         remove_wait_queue(&op->waitq, &wait_entry);
441         spin_unlock(&op->lock);
442
443         return ret;
444 }
445
446 /*
447  * similar to wait_for_matching_downcall(), but used in the special case
448  * of I/O cancellations.
449  *
450  * Note we need a special wait function because if this is called we already
451  *      know that a signal is pending in current and need to service the
452  *      cancellation upcall anyway.  the only way to exit this is to either
453  *      timeout or have the cancellation be serviced properly.
454  */
455 int wait_for_cancellation_downcall(struct pvfs2_kernel_op_s *op)
456 {
457         int ret = -EINVAL;
458         DECLARE_WAITQUEUE(wait_entry, current);
459
460         spin_lock(&op->lock);
461         add_wait_queue(&op->waitq, &wait_entry);
462         spin_unlock(&op->lock);
463
464         while (1) {
465                 set_current_state(TASK_INTERRUPTIBLE);
466
467                 spin_lock(&op->lock);
468                 if (op_state_serviced(op)) {
469                         gossip_debug(GOSSIP_WAIT_DEBUG,
470                                      "%s:op-state is SERVICED.\n",
471                                      __func__);
472                         spin_unlock(&op->lock);
473                         ret = 0;
474                         break;
475                 }
476                 spin_unlock(&op->lock);
477
478                 if (signal_pending(current)) {
479                         gossip_debug(GOSSIP_WAIT_DEBUG,
480                                      "%s:operation interrupted by a signal (tag"
481                                      " %llu, op %p)\n",
482                                      __func__,
483                                      llu(op->tag),
484                                      op);
485                         pvfs2_clean_up_interrupted_operation(op);
486                         ret = -EINTR;
487                         break;
488                 }
489
490                 gossip_debug(GOSSIP_WAIT_DEBUG,
491                              "%s:About to call schedule_timeout.\n",
492                              __func__);
493                 ret =
494                     schedule_timeout(MSECS_TO_JIFFIES(1000 * op_timeout_secs));
495
496                 gossip_debug(GOSSIP_WAIT_DEBUG,
497                              "%s:Value returned from schedule_timeout(%d).\n",
498                              __func__,
499                              ret);
500                 if (!ret) {
501                         gossip_debug(GOSSIP_WAIT_DEBUG,
502                                      "%s:*** operation timed out: %p\n",
503                                      __func__,
504                                      op);
505                         pvfs2_clean_up_interrupted_operation(op);
506                         ret = -ETIMEDOUT;
507                         break;
508                 }
509
510                 gossip_debug(GOSSIP_WAIT_DEBUG,
511                              "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n",
512                              __func__);
513                 ret = -ETIMEDOUT;
514                 break;
515         }
516
517         set_current_state(TASK_RUNNING);
518
519         spin_lock(&op->lock);
520         remove_wait_queue(&op->waitq, &wait_entry);
521         spin_unlock(&op->lock);
522
523         gossip_debug(GOSSIP_WAIT_DEBUG,
524                      "%s:returning ret(%d)\n",
525                      __func__,
526                      ret);
527
528         return ret;
529 }