ACPI / EC: Work around method reentrancy limit in ACPICA for _Qxx
[cascardo/linux.git] / drivers / staging / lustre / lustre / ldlm / ldlm_flock.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003 Hewlett-Packard Development Company LP.
28  * Developed under the sponsorship of the US Government under
29  * Subcontract No. B514193
30  *
31  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
32  * Use is subject to license terms.
33  *
34  * Copyright (c) 2010, 2012, Intel Corporation.
35  */
36 /*
37  * This file is part of Lustre, http://www.lustre.org/
38  * Lustre is a trademark of Sun Microsystems, Inc.
39  */
40
41 /**
42  * This file implements POSIX lock type for Lustre.
43  * Its policy properties are start and end of extent and PID.
44  *
45  * These locks are only done through MDS due to POSIX semantics requiring
46  * e.g. that locks could be only partially released and as such split into
47  * two parts, and also that two adjacent locks from the same process may be
48  * merged into a single wider lock.
49  *
50  * Lock modes are mapped like this:
51  * PR and PW for READ and WRITE locks
52  * NL to request a releasing of a portion of the lock
53  *
54  * These flock locks never timeout.
55  */
56
57 #define DEBUG_SUBSYSTEM S_LDLM
58
59 #include "../include/lustre_dlm.h"
60 #include "../include/obd_support.h"
61 #include "../include/obd_class.h"
62 #include "../include/lustre_lib.h"
63 #include <linux/list.h>
64 #include "ldlm_internal.h"
65
66 /**
67  * list_for_remaining_safe - iterate over the remaining entries in a list
68  *            and safeguard against removal of a list entry.
69  * \param pos   the &struct list_head to use as a loop counter. pos MUST
70  *            have been initialized prior to using it in this macro.
71  * \param n     another &struct list_head to use as temporary storage
72  * \param head  the head for your list.
73  */
74 #define list_for_remaining_safe(pos, n, head) \
75         for (n = pos->next; pos != (head); pos = n, n = pos->next)
76
77 static inline int
78 ldlm_same_flock_owner(struct ldlm_lock *lock, struct ldlm_lock *new)
79 {
80         return((new->l_policy_data.l_flock.owner ==
81                 lock->l_policy_data.l_flock.owner) &&
82                (new->l_export == lock->l_export));
83 }
84
85 static inline int
86 ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new)
87 {
88         return((new->l_policy_data.l_flock.start <=
89                 lock->l_policy_data.l_flock.end) &&
90                (new->l_policy_data.l_flock.end >=
91                 lock->l_policy_data.l_flock.start));
92 }
93
94 static inline void
95 ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode, __u64 flags)
96 {
97         LDLM_DEBUG(lock, "ldlm_flock_destroy(mode: %d, flags: 0x%llx)",
98                    mode, flags);
99
100         /* Safe to not lock here, since it should be empty anyway */
101         LASSERT(hlist_unhashed(&lock->l_exp_flock_hash));
102
103         list_del_init(&lock->l_res_link);
104         if (flags == LDLM_FL_WAIT_NOREPROC && !ldlm_is_failed(lock)) {
105                 /* client side - set a flag to prevent sending a CANCEL */
106                 lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
107
108                 /* when reaching here, it is under lock_res_and_lock(). Thus,
109                  * need call the nolock version of ldlm_lock_decref_internal
110                  */
111                 ldlm_lock_decref_internal_nolock(lock, mode);
112         }
113
114         ldlm_lock_destroy_nolock(lock);
115 }
116
117 /**
118  * Process a granting attempt for flock lock.
119  * Must be called under ns lock held.
120  *
121  * This function looks for any conflicts for \a lock in the granted or
122  * waiting queues. The lock is granted if no conflicts are found in
123  * either queue.
124  *
125  * It is also responsible for splitting a lock if a portion of the lock
126  * is released.
127  *
128  * If \a first_enq is 0 (ie, called from ldlm_reprocess_queue):
129  *   - blocking ASTs have already been sent
130  *
131  * If \a first_enq is 1 (ie, called from ldlm_lock_enqueue):
132  *   - blocking ASTs have not been sent yet, so list of conflicting locks
133  *     would be collected and ASTs sent.
134  */
135 static int ldlm_process_flock_lock(struct ldlm_lock *req, __u64 *flags,
136                                    int first_enq, enum ldlm_error *err,
137                                    struct list_head *work_list)
138 {
139         struct ldlm_resource *res = req->l_resource;
140         struct ldlm_namespace *ns = ldlm_res_to_ns(res);
141         struct list_head *tmp;
142         struct list_head *ownlocks = NULL;
143         struct ldlm_lock *lock = NULL;
144         struct ldlm_lock *new = req;
145         struct ldlm_lock *new2 = NULL;
146         enum ldlm_mode mode = req->l_req_mode;
147         int added = (mode == LCK_NL);
148         int overlaps = 0;
149         int splitted = 0;
150         const struct ldlm_callback_suite null_cbs = { NULL };
151
152         CDEBUG(D_DLMTRACE,
153                "flags %#llx owner %llu pid %u mode %u start %llu end %llu\n",
154                *flags, new->l_policy_data.l_flock.owner,
155                new->l_policy_data.l_flock.pid, mode,
156                req->l_policy_data.l_flock.start,
157                req->l_policy_data.l_flock.end);
158
159         *err = ELDLM_OK;
160
161         /* No blocking ASTs are sent to the clients for
162          * Posix file & record locks
163          */
164         req->l_blocking_ast = NULL;
165
166 reprocess:
167         if ((*flags == LDLM_FL_WAIT_NOREPROC) || (mode == LCK_NL)) {
168                 /* This loop determines where this processes locks start
169                  * in the resource lr_granted list.
170                  */
171                 list_for_each(tmp, &res->lr_granted) {
172                         lock = list_entry(tmp, struct ldlm_lock,
173                                               l_res_link);
174                         if (ldlm_same_flock_owner(lock, req)) {
175                                 ownlocks = tmp;
176                                 break;
177                         }
178                 }
179         } else {
180                 int reprocess_failed = 0;
181
182                 lockmode_verify(mode);
183
184                 /* This loop determines if there are existing locks
185                  * that conflict with the new lock request.
186                  */
187                 list_for_each(tmp, &res->lr_granted) {
188                         lock = list_entry(tmp, struct ldlm_lock,
189                                               l_res_link);
190
191                         if (ldlm_same_flock_owner(lock, req)) {
192                                 if (!ownlocks)
193                                         ownlocks = tmp;
194                                 continue;
195                         }
196
197                         /* locks are compatible, overlap doesn't matter */
198                         if (lockmode_compat(lock->l_granted_mode, mode))
199                                 continue;
200
201                         if (!ldlm_flocks_overlap(lock, req))
202                                 continue;
203
204                         if (!first_enq) {
205                                 reprocess_failed = 1;
206                                 continue;
207                         }
208
209                         if (*flags & LDLM_FL_BLOCK_NOWAIT) {
210                                 ldlm_flock_destroy(req, mode, *flags);
211                                 *err = -EAGAIN;
212                                 return LDLM_ITER_STOP;
213                         }
214
215                         if (*flags & LDLM_FL_TEST_LOCK) {
216                                 ldlm_flock_destroy(req, mode, *flags);
217                                 req->l_req_mode = lock->l_granted_mode;
218                                 req->l_policy_data.l_flock.pid =
219                                         lock->l_policy_data.l_flock.pid;
220                                 req->l_policy_data.l_flock.start =
221                                         lock->l_policy_data.l_flock.start;
222                                 req->l_policy_data.l_flock.end =
223                                         lock->l_policy_data.l_flock.end;
224                                 *flags |= LDLM_FL_LOCK_CHANGED;
225                                 return LDLM_ITER_STOP;
226                         }
227
228                         ldlm_resource_add_lock(res, &res->lr_waiting, req);
229                         *flags |= LDLM_FL_BLOCK_GRANTED;
230                         return LDLM_ITER_STOP;
231                 }
232                 if (reprocess_failed)
233                         return LDLM_ITER_CONTINUE;
234         }
235
236         if (*flags & LDLM_FL_TEST_LOCK) {
237                 ldlm_flock_destroy(req, mode, *flags);
238                 req->l_req_mode = LCK_NL;
239                 *flags |= LDLM_FL_LOCK_CHANGED;
240                 return LDLM_ITER_STOP;
241         }
242
243         /* Scan the locks owned by this process that overlap this request.
244          * We may have to merge or split existing locks.
245          */
246         if (!ownlocks)
247                 ownlocks = &res->lr_granted;
248
249         list_for_remaining_safe(ownlocks, tmp, &res->lr_granted) {
250                 lock = list_entry(ownlocks, struct ldlm_lock, l_res_link);
251
252                 if (!ldlm_same_flock_owner(lock, new))
253                         break;
254
255                 if (lock->l_granted_mode == mode) {
256                         /* If the modes are the same then we need to process
257                          * locks that overlap OR adjoin the new lock. The extra
258                          * logic condition is necessary to deal with arithmetic
259                          * overflow and underflow.
260                          */
261                         if ((new->l_policy_data.l_flock.start >
262                              (lock->l_policy_data.l_flock.end + 1))
263                             && (lock->l_policy_data.l_flock.end !=
264                                 OBD_OBJECT_EOF))
265                                 continue;
266
267                         if ((new->l_policy_data.l_flock.end <
268                              (lock->l_policy_data.l_flock.start - 1))
269                             && (lock->l_policy_data.l_flock.start != 0))
270                                 break;
271
272                         if (new->l_policy_data.l_flock.start <
273                             lock->l_policy_data.l_flock.start) {
274                                 lock->l_policy_data.l_flock.start =
275                                         new->l_policy_data.l_flock.start;
276                         } else {
277                                 new->l_policy_data.l_flock.start =
278                                         lock->l_policy_data.l_flock.start;
279                         }
280
281                         if (new->l_policy_data.l_flock.end >
282                             lock->l_policy_data.l_flock.end) {
283                                 lock->l_policy_data.l_flock.end =
284                                         new->l_policy_data.l_flock.end;
285                         } else {
286                                 new->l_policy_data.l_flock.end =
287                                         lock->l_policy_data.l_flock.end;
288                         }
289
290                         if (added) {
291                                 ldlm_flock_destroy(lock, mode, *flags);
292                         } else {
293                                 new = lock;
294                                 added = 1;
295                         }
296                         continue;
297                 }
298
299                 if (new->l_policy_data.l_flock.start >
300                     lock->l_policy_data.l_flock.end)
301                         continue;
302
303                 if (new->l_policy_data.l_flock.end <
304                     lock->l_policy_data.l_flock.start)
305                         break;
306
307                 ++overlaps;
308
309                 if (new->l_policy_data.l_flock.start <=
310                     lock->l_policy_data.l_flock.start) {
311                         if (new->l_policy_data.l_flock.end <
312                             lock->l_policy_data.l_flock.end) {
313                                 lock->l_policy_data.l_flock.start =
314                                         new->l_policy_data.l_flock.end + 1;
315                                 break;
316                         }
317                         ldlm_flock_destroy(lock, lock->l_req_mode, *flags);
318                         continue;
319                 }
320                 if (new->l_policy_data.l_flock.end >=
321                     lock->l_policy_data.l_flock.end) {
322                         lock->l_policy_data.l_flock.end =
323                                 new->l_policy_data.l_flock.start - 1;
324                         continue;
325                 }
326
327                 /* split the existing lock into two locks */
328
329                 /* if this is an F_UNLCK operation then we could avoid
330                  * allocating a new lock and use the req lock passed in
331                  * with the request but this would complicate the reply
332                  * processing since updates to req get reflected in the
333                  * reply. The client side replays the lock request so
334                  * it must see the original lock data in the reply.
335                  */
336
337                 /* XXX - if ldlm_lock_new() can sleep we should
338                  * release the lr_lock, allocate the new lock,
339                  * and restart processing this lock.
340                  */
341                 if (!new2) {
342                         unlock_res_and_lock(req);
343                         new2 = ldlm_lock_create(ns, &res->lr_name, LDLM_FLOCK,
344                                                 lock->l_granted_mode, &null_cbs,
345                                                 NULL, 0, LVB_T_NONE);
346                         lock_res_and_lock(req);
347                         if (!new2) {
348                                 ldlm_flock_destroy(req, lock->l_granted_mode,
349                                                    *flags);
350                                 *err = -ENOLCK;
351                                 return LDLM_ITER_STOP;
352                         }
353                         goto reprocess;
354                 }
355
356                 splitted = 1;
357
358                 new2->l_granted_mode = lock->l_granted_mode;
359                 new2->l_policy_data.l_flock.pid =
360                         new->l_policy_data.l_flock.pid;
361                 new2->l_policy_data.l_flock.owner =
362                         new->l_policy_data.l_flock.owner;
363                 new2->l_policy_data.l_flock.start =
364                         lock->l_policy_data.l_flock.start;
365                 new2->l_policy_data.l_flock.end =
366                         new->l_policy_data.l_flock.start - 1;
367                 lock->l_policy_data.l_flock.start =
368                         new->l_policy_data.l_flock.end + 1;
369                 new2->l_conn_export = lock->l_conn_export;
370                 if (lock->l_export) {
371                         new2->l_export = class_export_lock_get(lock->l_export,
372                                                                new2);
373                         if (new2->l_export->exp_lock_hash &&
374                             hlist_unhashed(&new2->l_exp_hash))
375                                 cfs_hash_add(new2->l_export->exp_lock_hash,
376                                              &new2->l_remote_handle,
377                                              &new2->l_exp_hash);
378                 }
379                 if (*flags == LDLM_FL_WAIT_NOREPROC)
380                         ldlm_lock_addref_internal_nolock(new2,
381                                                          lock->l_granted_mode);
382
383                 /* insert new2 at lock */
384                 ldlm_resource_add_lock(res, ownlocks, new2);
385                 LDLM_LOCK_RELEASE(new2);
386                 break;
387         }
388
389         /* if new2 is created but never used, destroy it*/
390         if (splitted == 0 && new2)
391                 ldlm_lock_destroy_nolock(new2);
392
393         /* At this point we're granting the lock request. */
394         req->l_granted_mode = req->l_req_mode;
395
396         if (!added) {
397                 list_del_init(&req->l_res_link);
398                 /* insert new lock before ownlocks in list. */
399                 ldlm_resource_add_lock(res, ownlocks, req);
400         }
401
402         if (*flags != LDLM_FL_WAIT_NOREPROC) {
403                 /* The only one possible case for client-side calls flock
404                  * policy function is ldlm_flock_completion_ast inside which
405                  * carries LDLM_FL_WAIT_NOREPROC flag.
406                  */
407                 CERROR("Illegal parameter for client-side-only module.\n");
408                 LBUG();
409         }
410
411         /* In case we're reprocessing the requested lock we can't destroy
412          * it until after calling ldlm_add_ast_work_item() above so that laawi()
413          * can bump the reference count on \a req. Otherwise \a req
414          * could be freed before the completion AST can be sent.
415          */
416         if (added)
417                 ldlm_flock_destroy(req, mode, *flags);
418
419         ldlm_resource_dump(D_INFO, res);
420         return LDLM_ITER_CONTINUE;
421 }
422
423 struct ldlm_flock_wait_data {
424         struct ldlm_lock *fwd_lock;
425         int            fwd_generation;
426 };
427
428 static void
429 ldlm_flock_interrupted_wait(void *data)
430 {
431         struct ldlm_lock *lock;
432
433         lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
434
435         lock_res_and_lock(lock);
436
437         /* client side - set flag to prevent lock from being put on LRU list */
438         ldlm_set_cbpending(lock);
439         unlock_res_and_lock(lock);
440 }
441
442 /**
443  * Flock completion callback function.
444  *
445  * \param lock [in,out]: A lock to be handled
446  * \param flags    [in]: flags
447  * \param *data    [in]: ldlm_work_cp_ast_lock() will use ldlm_cb_set_arg
448  *
449  * \retval 0    : success
450  * \retval <0   : failure
451  */
452 int
453 ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
454 {
455         struct file_lock                *getlk = lock->l_ast_data;
456         struct obd_device             *obd;
457         struct obd_import             *imp = NULL;
458         struct ldlm_flock_wait_data     fwd;
459         struct l_wait_info            lwi;
460         enum ldlm_error             err;
461         int                          rc = 0;
462
463         CDEBUG(D_DLMTRACE, "flags: 0x%llx data: %p getlk: %p\n",
464                flags, data, getlk);
465
466         /* Import invalidation. We need to actually release the lock
467          * references being held, so that it can go away. No point in
468          * holding the lock even if app still believes it has it, since
469          * server already dropped it anyway. Only for granted locks too.
470          */
471         if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) ==
472             (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) {
473                 if (lock->l_req_mode == lock->l_granted_mode &&
474                     lock->l_granted_mode != LCK_NL && !data)
475                         ldlm_lock_decref_internal(lock, lock->l_req_mode);
476
477                 /* Need to wake up the waiter if we were evicted */
478                 wake_up(&lock->l_waitq);
479                 return 0;
480         }
481
482         LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
483
484         if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
485                        LDLM_FL_BLOCK_CONV))) {
486                 if (!data)
487                         /* mds granted the lock in the reply */
488                         goto granted;
489                 /* CP AST RPC: lock get granted, wake it up */
490                 wake_up(&lock->l_waitq);
491                 return 0;
492         }
493
494         LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, sleeping");
495         fwd.fwd_lock = lock;
496         obd = class_exp2obd(lock->l_conn_export);
497
498         /* if this is a local lock, there is no import */
499         if (obd)
500                 imp = obd->u.cli.cl_import;
501
502         if (imp) {
503                 spin_lock(&imp->imp_lock);
504                 fwd.fwd_generation = imp->imp_generation;
505                 spin_unlock(&imp->imp_lock);
506         }
507
508         lwi = LWI_TIMEOUT_INTR(0, NULL, ldlm_flock_interrupted_wait, &fwd);
509
510         /* Go to sleep until the lock is granted. */
511         rc = l_wait_event(lock->l_waitq, is_granted_or_cancelled(lock), &lwi);
512
513         if (rc) {
514                 LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
515                            rc);
516                 return rc;
517         }
518
519 granted:
520         OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
521
522         if (ldlm_is_failed(lock)) {
523                 LDLM_DEBUG(lock, "client-side enqueue waking up: failed");
524                 return -EIO;
525         }
526
527         LDLM_DEBUG(lock, "client-side enqueue granted");
528
529         lock_res_and_lock(lock);
530
531         /*
532          * Protect against race where lock could have been just destroyed
533          * due to overlap in ldlm_process_flock_lock().
534          */
535         if (ldlm_is_destroyed(lock)) {
536                 unlock_res_and_lock(lock);
537                 LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
538                 return 0;
539         }
540
541         /* ldlm_lock_enqueue() has already placed lock on the granted list. */
542         list_del_init(&lock->l_res_link);
543
544         if (ldlm_is_flock_deadlock(lock)) {
545                 LDLM_DEBUG(lock, "client-side enqueue deadlock received");
546                 rc = -EDEADLK;
547         } else if (flags & LDLM_FL_TEST_LOCK) {
548                 /* fcntl(F_GETLK) request */
549                 /* The old mode was saved in getlk->fl_type so that if the mode
550                  * in the lock changes we can decref the appropriate refcount.
551                  */
552                 ldlm_flock_destroy(lock, getlk->fl_type, LDLM_FL_WAIT_NOREPROC);
553                 switch (lock->l_granted_mode) {
554                 case LCK_PR:
555                         getlk->fl_type = F_RDLCK;
556                         break;
557                 case LCK_PW:
558                         getlk->fl_type = F_WRLCK;
559                         break;
560                 default:
561                         getlk->fl_type = F_UNLCK;
562                 }
563                 getlk->fl_pid = (pid_t)lock->l_policy_data.l_flock.pid;
564                 getlk->fl_start = (loff_t)lock->l_policy_data.l_flock.start;
565                 getlk->fl_end = (loff_t)lock->l_policy_data.l_flock.end;
566         } else {
567                 __u64 noreproc = LDLM_FL_WAIT_NOREPROC;
568
569                 /* We need to reprocess the lock to do merges or splits
570                  * with existing locks owned by this process.
571                  */
572                 ldlm_process_flock_lock(lock, &noreproc, 1, &err, NULL);
573         }
574         unlock_res_and_lock(lock);
575         return rc;
576 }
577 EXPORT_SYMBOL(ldlm_flock_completion_ast);
578
579 void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy,
580                                        ldlm_policy_data_t *lpolicy)
581 {
582         memset(lpolicy, 0, sizeof(*lpolicy));
583         lpolicy->l_flock.start = wpolicy->l_flock.lfw_start;
584         lpolicy->l_flock.end = wpolicy->l_flock.lfw_end;
585         lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid;
586         /* Compat code, old clients had no idea about owner field and
587          * relied solely on pid for ownership. Introduced in LU-104, 2.1,
588          * April 2011
589          */
590         lpolicy->l_flock.owner = wpolicy->l_flock.lfw_pid;
591 }
592
593 void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy,
594                                        ldlm_policy_data_t *lpolicy)
595 {
596         memset(lpolicy, 0, sizeof(*lpolicy));
597         lpolicy->l_flock.start = wpolicy->l_flock.lfw_start;
598         lpolicy->l_flock.end = wpolicy->l_flock.lfw_end;
599         lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid;
600         lpolicy->l_flock.owner = wpolicy->l_flock.lfw_owner;
601 }
602
603 void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
604                                      ldlm_wire_policy_data_t *wpolicy)
605 {
606         memset(wpolicy, 0, sizeof(*wpolicy));
607         wpolicy->l_flock.lfw_start = lpolicy->l_flock.start;
608         wpolicy->l_flock.lfw_end = lpolicy->l_flock.end;
609         wpolicy->l_flock.lfw_pid = lpolicy->l_flock.pid;
610         wpolicy->l_flock.lfw_owner = lpolicy->l_flock.owner;
611 }