4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 #define DEBUG_SUBSYSTEM S_LOV
39 #include "../../include/linux/libcfs/libcfs.h"
41 #include "../include/obd_class.h"
42 #include "../include/lustre/lustre_idl.h"
43 #include "lov_internal.h"
45 static void lov_init_set(struct lov_request_set *set)
48 atomic_set(&set->set_completes, 0);
49 atomic_set(&set->set_success, 0);
50 atomic_set(&set->set_finish_checked, 0);
51 set->set_cookies = NULL;
52 INIT_LIST_HEAD(&set->set_list);
53 atomic_set(&set->set_refcount, 1);
54 init_waitqueue_head(&set->set_waitq);
57 void lov_finish_set(struct lov_request_set *set)
59 struct list_head *pos, *n;
62 list_for_each_safe(pos, n, &set->set_list) {
63 struct lov_request *req = list_entry(pos,
66 list_del_init(&req->rq_link);
69 kmem_cache_free(obdo_cachep, req->rq_oi.oi_oa);
70 kfree(req->rq_oi.oi_osfs);
76 static int lov_set_finished(struct lov_request_set *set, int idempotent)
78 int completes = atomic_read(&set->set_completes);
80 CDEBUG(D_INFO, "check set %d/%d\n", completes, set->set_count);
82 if (completes == set->set_count) {
85 if (atomic_inc_return(&set->set_finish_checked) == 1)
91 static void lov_update_set(struct lov_request_set *set,
92 struct lov_request *req, int rc)
97 atomic_inc(&set->set_completes);
99 atomic_inc(&set->set_success);
101 wake_up(&set->set_waitq);
104 int lov_update_common_set(struct lov_request_set *set,
105 struct lov_request *req, int rc)
107 struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
109 lov_update_set(set, req, rc);
111 /* grace error on inactive ost */
112 if (rc && !(lov->lov_tgts[req->rq_idx] &&
113 lov->lov_tgts[req->rq_idx]->ltd_active))
116 /* FIXME in raid1 regime, should return 0 */
120 static void lov_set_add_req(struct lov_request *req,
121 struct lov_request_set *set)
123 list_add_tail(&req->rq_link, &set->set_list);
128 static int lov_check_set(struct lov_obd *lov, int idx)
131 struct lov_tgt_desc *tgt;
133 mutex_lock(&lov->lov_lock);
134 tgt = lov->lov_tgts[idx];
135 rc = !tgt || tgt->ltd_active ||
137 class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried);
138 mutex_unlock(&lov->lov_lock);
143 /* Check if the OSC connection exists and is active.
144 * If the OSC has not yet had a chance to connect to the OST the first time,
145 * wait once for it to connect instead of returning an error.
147 static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
149 wait_queue_head_t waitq;
150 struct l_wait_info lwi;
151 struct lov_tgt_desc *tgt;
154 mutex_lock(&lov->lov_lock);
156 tgt = lov->lov_tgts[ost_idx];
158 if (unlikely(!tgt)) {
163 if (likely(tgt->ltd_active)) {
168 if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried) {
173 mutex_unlock(&lov->lov_lock);
175 init_waitqueue_head(&waitq);
176 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(obd_timeout),
177 cfs_time_seconds(1), NULL, NULL);
179 rc = l_wait_event(waitq, lov_check_set(lov, ost_idx), &lwi);
186 mutex_unlock(&lov->lov_lock);
190 static int common_attr_done(struct lov_request_set *set)
192 struct lov_request *req;
194 int rc = 0, attrset = 0;
196 if (!set->set_oi->oi_oa)
199 if (!atomic_read(&set->set_success))
202 tmp_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
208 list_for_each_entry(req, &set->set_list, rq_link) {
209 if (!req->rq_complete || req->rq_rc)
211 if (req->rq_oi.oi_oa->o_valid == 0) /* inactive stripe */
213 lov_merge_attrs(tmp_oa, req->rq_oi.oi_oa,
214 req->rq_oi.oi_oa->o_valid,
215 set->set_oi->oi_md, req->rq_stripe, &attrset);
218 CERROR("No stripes had valid attrs\n");
221 if ((set->set_oi->oi_oa->o_valid & OBD_MD_FLEPOCH) &&
222 (set->set_oi->oi_md->lsm_stripe_count != attrset)) {
223 /* When we take attributes of some epoch, we require all the
226 CERROR("Not all the stripes had valid attrs\n");
231 tmp_oa->o_oi = set->set_oi->oi_oa->o_oi;
232 memcpy(set->set_oi->oi_oa, tmp_oa, sizeof(*set->set_oi->oi_oa));
235 kmem_cache_free(obdo_cachep, tmp_oa);
239 int lov_fini_getattr_set(struct lov_request_set *set)
245 LASSERT(set->set_exp);
246 if (atomic_read(&set->set_completes))
247 rc = common_attr_done(set);
254 /* The callback for osc_getattr_async that finalizes a request info when a
255 * response is received.
257 static int cb_getattr_update(void *cookie, int rc)
259 struct obd_info *oinfo = cookie;
260 struct lov_request *lovreq;
262 lovreq = container_of(oinfo, struct lov_request, rq_oi);
263 return lov_update_common_set(lovreq->rq_rqset, lovreq, rc);
266 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
267 struct lov_request_set **reqset)
269 struct lov_request_set *set;
270 struct lov_obd *lov = &exp->exp_obd->u.lov;
273 set = kzalloc(sizeof(*set), GFP_NOFS);
281 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
282 struct lov_oinfo *loi;
283 struct lov_request *req;
285 loi = oinfo->oi_md->lsm_oinfo[i];
286 if (lov_oinfo_is_dummy(loi))
289 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
290 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
291 if (oinfo->oi_oa->o_valid & OBD_MD_FLEPOCH) {
292 /* SOM requires all the OSTs to be active. */
299 req = kzalloc(sizeof(*req), GFP_NOFS);
306 req->rq_idx = loi->loi_ost_idx;
308 req->rq_oi.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
309 if (!req->rq_oi.oi_oa) {
314 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
315 sizeof(*req->rq_oi.oi_oa));
316 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
317 req->rq_oi.oi_cb_up = cb_getattr_update;
319 lov_set_add_req(req, set);
321 if (!set->set_count) {
328 lov_fini_getattr_set(set);
332 int lov_fini_destroy_set(struct lov_request_set *set)
336 LASSERT(set->set_exp);
337 if (atomic_read(&set->set_completes)) {
338 /* FIXME update qos data here */
346 int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
347 struct obdo *src_oa, struct lov_stripe_md *lsm,
348 struct obd_trans_info *oti,
349 struct lov_request_set **reqset)
351 struct lov_request_set *set;
352 struct lov_obd *lov = &exp->exp_obd->u.lov;
355 set = kzalloc(sizeof(*set), GFP_NOFS);
362 set->set_oi->oi_md = lsm;
363 set->set_oi->oi_oa = src_oa;
364 if (oti && src_oa->o_valid & OBD_MD_FLCOOKIE)
365 set->set_cookies = oti->oti_logcookies;
367 for (i = 0; i < lsm->lsm_stripe_count; i++) {
368 struct lov_oinfo *loi;
369 struct lov_request *req;
371 loi = lsm->lsm_oinfo[i];
372 if (lov_oinfo_is_dummy(loi))
375 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
376 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
380 req = kzalloc(sizeof(*req), GFP_NOFS);
387 req->rq_idx = loi->loi_ost_idx;
389 req->rq_oi.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
390 if (!req->rq_oi.oi_oa) {
395 memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
396 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
397 lov_set_add_req(req, set);
399 if (!set->set_count) {
406 lov_fini_destroy_set(set);
410 int lov_fini_setattr_set(struct lov_request_set *set)
416 LASSERT(set->set_exp);
417 if (atomic_read(&set->set_completes)) {
418 rc = common_attr_done(set);
419 /* FIXME update qos data here */
426 int lov_update_setattr_set(struct lov_request_set *set,
427 struct lov_request *req, int rc)
429 struct lov_obd *lov = &req->rq_rqset->set_exp->exp_obd->u.lov;
430 struct lov_stripe_md *lsm = req->rq_rqset->set_oi->oi_md;
432 lov_update_set(set, req, rc);
434 /* grace error on inactive ost */
435 if (rc && !(lov->lov_tgts[req->rq_idx] &&
436 lov->lov_tgts[req->rq_idx]->ltd_active))
440 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLCTIME)
441 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_ctime =
442 req->rq_oi.oi_oa->o_ctime;
443 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLMTIME)
444 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_mtime =
445 req->rq_oi.oi_oa->o_mtime;
446 if (req->rq_oi.oi_oa->o_valid & OBD_MD_FLATIME)
447 lsm->lsm_oinfo[req->rq_stripe]->loi_lvb.lvb_atime =
448 req->rq_oi.oi_oa->o_atime;
454 /* The callback for osc_setattr_async that finalizes a request info when a
455 * response is received.
457 static int cb_setattr_update(void *cookie, int rc)
459 struct obd_info *oinfo = cookie;
460 struct lov_request *lovreq;
462 lovreq = container_of(oinfo, struct lov_request, rq_oi);
463 return lov_update_setattr_set(lovreq->rq_rqset, lovreq, rc);
466 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
467 struct obd_trans_info *oti,
468 struct lov_request_set **reqset)
470 struct lov_request_set *set;
471 struct lov_obd *lov = &exp->exp_obd->u.lov;
474 set = kzalloc(sizeof(*set), GFP_NOFS);
481 if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
482 set->set_cookies = oti->oti_logcookies;
484 for (i = 0; i < oinfo->oi_md->lsm_stripe_count; i++) {
485 struct lov_oinfo *loi = oinfo->oi_md->lsm_oinfo[i];
486 struct lov_request *req;
488 if (lov_oinfo_is_dummy(loi))
491 if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
492 CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
496 req = kzalloc(sizeof(*req), GFP_NOFS);
502 req->rq_idx = loi->loi_ost_idx;
504 req->rq_oi.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
505 if (!req->rq_oi.oi_oa) {
510 memcpy(req->rq_oi.oi_oa, oinfo->oi_oa,
511 sizeof(*req->rq_oi.oi_oa));
512 req->rq_oi.oi_oa->o_oi = loi->loi_oi;
513 req->rq_oi.oi_oa->o_stripe_idx = i;
514 req->rq_oi.oi_cb_up = cb_setattr_update;
516 if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE) {
517 int off = lov_stripe_offset(oinfo->oi_md,
518 oinfo->oi_oa->o_size, i,
519 &req->rq_oi.oi_oa->o_size);
521 if (off < 0 && req->rq_oi.oi_oa->o_size)
522 req->rq_oi.oi_oa->o_size--;
524 CDEBUG(D_INODE, "stripe %d has size %llu/%llu\n",
525 i, req->rq_oi.oi_oa->o_size,
526 oinfo->oi_oa->o_size);
528 lov_set_add_req(req, set);
530 if (!set->set_count) {
537 lov_fini_setattr_set(set);
541 #define LOV_U64_MAX ((__u64)~0ULL)
542 #define LOV_SUM_MAX(tot, add) \
544 if ((tot) + (add) < (tot)) \
545 (tot) = LOV_U64_MAX; \
550 int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,
554 __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov,
556 if (osfs->os_files != LOV_U64_MAX)
557 lov_do_div64(osfs->os_files, expected_stripes);
558 if (osfs->os_ffree != LOV_U64_MAX)
559 lov_do_div64(osfs->os_ffree, expected_stripes);
561 spin_lock(&obd->obd_osfs_lock);
562 memcpy(&obd->obd_osfs, osfs, sizeof(*osfs));
563 obd->obd_osfs_age = cfs_time_current_64();
564 spin_unlock(&obd->obd_osfs_lock);
571 int lov_fini_statfs_set(struct lov_request_set *set)
578 if (atomic_read(&set->set_completes)) {
579 rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs,
580 atomic_read(&set->set_success));
586 static void lov_update_statfs(struct obd_statfs *osfs,
587 struct obd_statfs *lov_sfs,
590 int shift = 0, quit = 0;
594 memcpy(osfs, lov_sfs, sizeof(*lov_sfs));
596 if (osfs->os_bsize != lov_sfs->os_bsize) {
597 /* assume all block sizes are always powers of 2 */
598 /* get the bits difference */
599 tmp = osfs->os_bsize | lov_sfs->os_bsize;
600 for (shift = 0; shift <= 64; ++shift) {
611 if (osfs->os_bsize < lov_sfs->os_bsize) {
612 osfs->os_bsize = lov_sfs->os_bsize;
614 osfs->os_bfree >>= shift;
615 osfs->os_bavail >>= shift;
616 osfs->os_blocks >>= shift;
617 } else if (shift != 0) {
618 lov_sfs->os_bfree >>= shift;
619 lov_sfs->os_bavail >>= shift;
620 lov_sfs->os_blocks >>= shift;
622 osfs->os_bfree += lov_sfs->os_bfree;
623 osfs->os_bavail += lov_sfs->os_bavail;
624 osfs->os_blocks += lov_sfs->os_blocks;
625 /* XXX not sure about this one - depends on policy.
626 * - could be minimum if we always stripe on all OBDs
627 * (but that would be wrong for any other policy,
628 * if one of the OBDs has no more objects left)
629 * - could be sum if we stripe whole objects
630 * - could be average, just to give a nice number
632 * To give a "reasonable" (if not wholly accurate)
633 * number, we divide the total number of free objects
634 * by expected stripe count (watch out for overflow).
636 LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files);
637 LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree);
641 /* The callback for osc_statfs_async that finalizes a request info when a
642 * response is received.
644 static int cb_statfs_update(void *cookie, int rc)
646 struct obd_info *oinfo = cookie;
647 struct lov_request *lovreq;
648 struct lov_request_set *set;
649 struct obd_statfs *osfs, *lov_sfs;
651 struct lov_tgt_desc *tgt;
652 struct obd_device *lovobd, *tgtobd;
655 lovreq = container_of(oinfo, struct lov_request, rq_oi);
656 set = lovreq->rq_rqset;
657 lovobd = set->set_obd;
658 lov = &lovobd->u.lov;
659 osfs = set->set_oi->oi_osfs;
660 lov_sfs = oinfo->oi_osfs;
661 success = atomic_read(&set->set_success);
662 /* XXX: the same is done in lov_update_common_set, however
663 * lovset->set_exp is not initialized.
665 lov_update_set(set, lovreq, rc);
670 tgt = lov->lov_tgts[lovreq->rq_idx];
671 if (!tgt || !tgt->ltd_active)
674 tgtobd = class_exp2obd(tgt->ltd_exp);
675 spin_lock(&tgtobd->obd_osfs_lock);
676 memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs));
677 if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0)
678 tgtobd->obd_osfs_age = cfs_time_current_64();
679 spin_unlock(&tgtobd->obd_osfs_lock);
682 lov_update_statfs(osfs, lov_sfs, success);
686 if (set->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
687 lov_set_finished(set, 0)) {
688 lov_statfs_interpret(NULL, set, set->set_count !=
689 atomic_read(&set->set_success));
695 int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
696 struct lov_request_set **reqset)
698 struct lov_request_set *set;
699 struct lov_obd *lov = &obd->u.lov;
702 set = kzalloc(sizeof(*set), GFP_NOFS);
710 /* We only get block data from the OBD */
711 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
712 struct lov_request *req;
714 if (!lov->lov_tgts[i] ||
715 (oinfo->oi_flags & OBD_STATFS_NODELAY &&
716 !lov->lov_tgts[i]->ltd_active)) {
717 CDEBUG(D_HA, "lov idx %d inactive\n", i);
721 if (!lov->lov_tgts[i]->ltd_active)
722 lov_check_and_wait_active(lov, i);
724 /* skip targets that have been explicitly disabled by the
727 if (!lov->lov_tgts[i]->ltd_exp) {
728 CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
732 req = kzalloc(sizeof(*req), GFP_NOFS);
738 req->rq_oi.oi_osfs = kzalloc(sizeof(*req->rq_oi.oi_osfs),
740 if (!req->rq_oi.oi_osfs) {
747 req->rq_oi.oi_cb_up = cb_statfs_update;
748 req->rq_oi.oi_flags = oinfo->oi_flags;
750 lov_set_add_req(req, set);
752 if (!set->set_count) {
759 lov_fini_statfs_set(set);