~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/xfs/xfs_dquot.c

Version: ~ [ linux-5.9 ] ~ [ linux-5.8.14 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.70 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.150 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.200 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.238 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.238 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2000-2003 Silicon Graphics, Inc.
  3  * All Rights Reserved.
  4  *
  5  * This program is free software; you can redistribute it and/or
  6  * modify it under the terms of the GNU General Public License as
  7  * published by the Free Software Foundation.
  8  *
  9  * This program is distributed in the hope that it would be useful,
 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12  * GNU General Public License for more details.
 13  *
 14  * You should have received a copy of the GNU General Public License
 15  * along with this program; if not, write the Free Software Foundation,
 16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 17  */
 18 #include "xfs.h"
 19 #include "xfs_fs.h"
 20 #include "xfs_format.h"
 21 #include "xfs_log_format.h"
 22 #include "xfs_shared.h"
 23 #include "xfs_trans_resv.h"
 24 #include "xfs_bit.h"
 25 #include "xfs_mount.h"
 26 #include "xfs_defer.h"
 27 #include "xfs_inode.h"
 28 #include "xfs_bmap.h"
 29 #include "xfs_bmap_util.h"
 30 #include "xfs_alloc.h"
 31 #include "xfs_quota.h"
 32 #include "xfs_error.h"
 33 #include "xfs_trans.h"
 34 #include "xfs_buf_item.h"
 35 #include "xfs_trans_space.h"
 36 #include "xfs_trans_priv.h"
 37 #include "xfs_qm.h"
 38 #include "xfs_cksum.h"
 39 #include "xfs_trace.h"
 40 #include "xfs_log.h"
 41 #include "xfs_bmap_btree.h"
 42 
 43 /*
 44  * Lock order:
 45  *
 46  * ip->i_lock
 47  *   qi->qi_tree_lock
 48  *     dquot->q_qlock (xfs_dqlock() and friends)
 49  *       dquot->q_flush (xfs_dqflock() and friends)
 50  *       qi->qi_lru_lock
 51  *
 52  * If two dquots need to be locked the order is user before group/project,
 53  * otherwise by the lowest id first, see xfs_dqlock2.
 54  */
 55 
 56 struct kmem_zone                *xfs_qm_dqtrxzone;
 57 static struct kmem_zone         *xfs_qm_dqzone;
 58 
 59 static struct lock_class_key xfs_dquot_group_class;
 60 static struct lock_class_key xfs_dquot_project_class;
 61 
 62 /*
 63  * This is called to free all the memory associated with a dquot
 64  */
 65 void
 66 xfs_qm_dqdestroy(
 67         xfs_dquot_t     *dqp)
 68 {
 69         ASSERT(list_empty(&dqp->q_lru));
 70 
 71         kmem_free(dqp->q_logitem.qli_item.li_lv_shadow);
 72         mutex_destroy(&dqp->q_qlock);
 73 
 74         XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
 75         kmem_zone_free(xfs_qm_dqzone, dqp);
 76 }
 77 
 78 /*
 79  * If default limits are in force, push them into the dquot now.
 80  * We overwrite the dquot limits only if they are zero and this
 81  * is not the root dquot.
 82  */
 83 void
 84 xfs_qm_adjust_dqlimits(
 85         struct xfs_mount        *mp,
 86         struct xfs_dquot        *dq)
 87 {
 88         struct xfs_quotainfo    *q = mp->m_quotainfo;
 89         struct xfs_disk_dquot   *d = &dq->q_core;
 90         struct xfs_def_quota    *defq;
 91         int                     prealloc = 0;
 92 
 93         ASSERT(d->d_id);
 94         defq = xfs_get_defquota(dq, q);
 95 
 96         if (defq->bsoftlimit && !d->d_blk_softlimit) {
 97                 d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
 98                 prealloc = 1;
 99         }
100         if (defq->bhardlimit && !d->d_blk_hardlimit) {
101                 d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
102                 prealloc = 1;
103         }
104         if (defq->isoftlimit && !d->d_ino_softlimit)
105                 d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
106         if (defq->ihardlimit && !d->d_ino_hardlimit)
107                 d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
108         if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
109                 d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
110         if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
111                 d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
112 
113         if (prealloc)
114                 xfs_dquot_set_prealloc_limits(dq);
115 }
116 
117 /*
118  * Check the limits and timers of a dquot and start or reset timers
119  * if necessary.
120  * This gets called even when quota enforcement is OFF, which makes our
121  * life a little less complicated. (We just don't reject any quota
122  * reservations in that case, when enforcement is off).
123  * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
124  * enforcement's off.
125  * In contrast, warnings are a little different in that they don't
126  * 'automatically' get started when limits get exceeded.  They do
127  * get reset to zero, however, when we find the count to be under
128  * the soft limit (they are only ever set non-zero via userspace).
129  */
130 void
131 xfs_qm_adjust_dqtimers(
132         xfs_mount_t             *mp,
133         xfs_disk_dquot_t        *d)
134 {
135         ASSERT(d->d_id);
136 
137 #ifdef DEBUG
138         if (d->d_blk_hardlimit)
139                 ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
140                        be64_to_cpu(d->d_blk_hardlimit));
141         if (d->d_ino_hardlimit)
142                 ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
143                        be64_to_cpu(d->d_ino_hardlimit));
144         if (d->d_rtb_hardlimit)
145                 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
146                        be64_to_cpu(d->d_rtb_hardlimit));
147 #endif
148 
149         if (!d->d_btimer) {
150                 if ((d->d_blk_softlimit &&
151                      (be64_to_cpu(d->d_bcount) >
152                       be64_to_cpu(d->d_blk_softlimit))) ||
153                     (d->d_blk_hardlimit &&
154                      (be64_to_cpu(d->d_bcount) >
155                       be64_to_cpu(d->d_blk_hardlimit)))) {
156                         d->d_btimer = cpu_to_be32(get_seconds() +
157                                         mp->m_quotainfo->qi_btimelimit);
158                 } else {
159                         d->d_bwarns = 0;
160                 }
161         } else {
162                 if ((!d->d_blk_softlimit ||
163                      (be64_to_cpu(d->d_bcount) <=
164                       be64_to_cpu(d->d_blk_softlimit))) &&
165                     (!d->d_blk_hardlimit ||
166                     (be64_to_cpu(d->d_bcount) <=
167                      be64_to_cpu(d->d_blk_hardlimit)))) {
168                         d->d_btimer = 0;
169                 }
170         }
171 
172         if (!d->d_itimer) {
173                 if ((d->d_ino_softlimit &&
174                      (be64_to_cpu(d->d_icount) >
175                       be64_to_cpu(d->d_ino_softlimit))) ||
176                     (d->d_ino_hardlimit &&
177                      (be64_to_cpu(d->d_icount) >
178                       be64_to_cpu(d->d_ino_hardlimit)))) {
179                         d->d_itimer = cpu_to_be32(get_seconds() +
180                                         mp->m_quotainfo->qi_itimelimit);
181                 } else {
182                         d->d_iwarns = 0;
183                 }
184         } else {
185                 if ((!d->d_ino_softlimit ||
186                      (be64_to_cpu(d->d_icount) <=
187                       be64_to_cpu(d->d_ino_softlimit)))  &&
188                     (!d->d_ino_hardlimit ||
189                      (be64_to_cpu(d->d_icount) <=
190                       be64_to_cpu(d->d_ino_hardlimit)))) {
191                         d->d_itimer = 0;
192                 }
193         }
194 
195         if (!d->d_rtbtimer) {
196                 if ((d->d_rtb_softlimit &&
197                      (be64_to_cpu(d->d_rtbcount) >
198                       be64_to_cpu(d->d_rtb_softlimit))) ||
199                     (d->d_rtb_hardlimit &&
200                      (be64_to_cpu(d->d_rtbcount) >
201                       be64_to_cpu(d->d_rtb_hardlimit)))) {
202                         d->d_rtbtimer = cpu_to_be32(get_seconds() +
203                                         mp->m_quotainfo->qi_rtbtimelimit);
204                 } else {
205                         d->d_rtbwarns = 0;
206                 }
207         } else {
208                 if ((!d->d_rtb_softlimit ||
209                      (be64_to_cpu(d->d_rtbcount) <=
210                       be64_to_cpu(d->d_rtb_softlimit))) &&
211                     (!d->d_rtb_hardlimit ||
212                      (be64_to_cpu(d->d_rtbcount) <=
213                       be64_to_cpu(d->d_rtb_hardlimit)))) {
214                         d->d_rtbtimer = 0;
215                 }
216         }
217 }
218 
219 /*
220  * initialize a buffer full of dquots and log the whole thing
221  */
222 STATIC void
223 xfs_qm_init_dquot_blk(
224         xfs_trans_t     *tp,
225         xfs_mount_t     *mp,
226         xfs_dqid_t      id,
227         uint            type,
228         xfs_buf_t       *bp)
229 {
230         struct xfs_quotainfo    *q = mp->m_quotainfo;
231         xfs_dqblk_t     *d;
232         xfs_dqid_t      curid;
233         int             i;
234 
235         ASSERT(tp);
236         ASSERT(xfs_buf_islocked(bp));
237 
238         d = bp->b_addr;
239 
240         /*
241          * ID of the first dquot in the block - id's are zero based.
242          */
243         curid = id - (id % q->qi_dqperchunk);
244         memset(d, 0, BBTOB(q->qi_dqchunklen));
245         for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
246                 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
247                 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
248                 d->dd_diskdq.d_id = cpu_to_be32(curid);
249                 d->dd_diskdq.d_flags = type;
250                 if (xfs_sb_version_hascrc(&mp->m_sb)) {
251                         uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
252                         xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
253                                          XFS_DQUOT_CRC_OFF);
254                 }
255         }
256 
257         xfs_trans_dquot_buf(tp, bp,
258                             (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
259                             ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
260                              XFS_BLF_GDQUOT_BUF)));
261         xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
262 }
263 
264 /*
265  * Initialize the dynamic speculative preallocation thresholds. The lo/hi
266  * watermarks correspond to the soft and hard limits by default. If a soft limit
267  * is not specified, we use 95% of the hard limit.
268  */
269 void
270 xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
271 {
272         uint64_t space;
273 
274         dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
275         dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
276         if (!dqp->q_prealloc_lo_wmark) {
277                 dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
278                 do_div(dqp->q_prealloc_lo_wmark, 100);
279                 dqp->q_prealloc_lo_wmark *= 95;
280         }
281 
282         space = dqp->q_prealloc_hi_wmark;
283 
284         do_div(space, 100);
285         dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
286         dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
287         dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
288 }
289 
290 /*
291  * Allocate a block and fill it with dquots.
292  * This is called when the bmapi finds a hole.
293  */
294 STATIC int
295 xfs_qm_dqalloc(
296         xfs_trans_t     **tpp,
297         xfs_mount_t     *mp,
298         xfs_dquot_t     *dqp,
299         xfs_inode_t     *quotip,
300         xfs_fileoff_t   offset_fsb,
301         xfs_buf_t       **O_bpp)
302 {
303         xfs_fsblock_t   firstblock;
304         struct xfs_defer_ops dfops;
305         xfs_bmbt_irec_t map;
306         int             nmaps, error;
307         xfs_buf_t       *bp;
308         xfs_trans_t     *tp = *tpp;
309 
310         ASSERT(tp != NULL);
311 
312         trace_xfs_dqalloc(dqp);
313 
314         /*
315          * Initialize the bmap freelist prior to calling bmapi code.
316          */
317         xfs_defer_init(&dfops, &firstblock);
318         xfs_ilock(quotip, XFS_ILOCK_EXCL);
319         /*
320          * Return if this type of quotas is turned off while we didn't
321          * have an inode lock
322          */
323         if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
324                 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
325                 return -ESRCH;
326         }
327 
328         xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
329         nmaps = 1;
330         error = xfs_bmapi_write(tp, quotip, offset_fsb,
331                                 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
332                                 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
333                                 &map, &nmaps, &dfops);
334         if (error)
335                 goto error0;
336         ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
337         ASSERT(nmaps == 1);
338         ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
339                (map.br_startblock != HOLESTARTBLOCK));
340 
341         /*
342          * Keep track of the blkno to save a lookup later
343          */
344         dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
345 
346         /* now we can just get the buffer (there's nothing to read yet) */
347         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
348                                dqp->q_blkno,
349                                mp->m_quotainfo->qi_dqchunklen,
350                                0);
351         if (!bp) {
352                 error = -ENOMEM;
353                 goto error1;
354         }
355         bp->b_ops = &xfs_dquot_buf_ops;
356 
357         /*
358          * Make a chunk of dquots out of this buffer and log
359          * the entire thing.
360          */
361         xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
362                               dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
363 
364         /*
365          * xfs_defer_finish() may commit the current transaction and
366          * start a second transaction if the freelist is not empty.
367          *
368          * Since we still want to modify this buffer, we need to
369          * ensure that the buffer is not released on commit of
370          * the first transaction and ensure the buffer is added to the
371          * second transaction.
372          *
373          * If there is only one transaction then don't stop the buffer
374          * from being released when it commits later on.
375          */
376 
377         xfs_trans_bhold(tp, bp);
378 
379         error = xfs_defer_finish(tpp, &dfops);
380         if (error)
381                 goto error1;
382 
383         /* Transaction was committed? */
384         if (*tpp != tp) {
385                 tp = *tpp;
386                 xfs_trans_bjoin(tp, bp);
387         } else {
388                 xfs_trans_bhold_release(tp, bp);
389         }
390 
391         *O_bpp = bp;
392         return 0;
393 
394 error1:
395         xfs_defer_cancel(&dfops);
396 error0:
397         xfs_iunlock(quotip, XFS_ILOCK_EXCL);
398 
399         return error;
400 }
401 
402 STATIC int
403 xfs_qm_dqrepair(
404         struct xfs_mount        *mp,
405         struct xfs_trans        *tp,
406         struct xfs_dquot        *dqp,
407         xfs_dqid_t              firstid,
408         struct xfs_buf          **bpp)
409 {
410         int                     error;
411         struct xfs_disk_dquot   *ddq;
412         struct xfs_dqblk        *d;
413         int                     i;
414 
415         /*
416          * Read the buffer without verification so we get the corrupted
417          * buffer returned to us. make sure we verify it on write, though.
418          */
419         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
420                                    mp->m_quotainfo->qi_dqchunklen,
421                                    0, bpp, NULL);
422 
423         if (error) {
424                 ASSERT(*bpp == NULL);
425                 return error;
426         }
427         (*bpp)->b_ops = &xfs_dquot_buf_ops;
428 
429         ASSERT(xfs_buf_islocked(*bpp));
430         d = (struct xfs_dqblk *)(*bpp)->b_addr;
431 
432         /* Do the actual repair of dquots in this buffer */
433         for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
434                 ddq = &d[i].dd_diskdq;
435                 error = xfs_dqcheck(mp, ddq, firstid + i,
436                                        dqp->dq_flags & XFS_DQ_ALLTYPES,
437                                        XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");
438                 if (error) {
439                         /* repair failed, we're screwed */
440                         xfs_trans_brelse(tp, *bpp);
441                         return -EIO;
442                 }
443         }
444 
445         return 0;
446 }
447 
448 /*
449  * Maps a dquot to the buffer containing its on-disk version.
450  * This returns a ptr to the buffer containing the on-disk dquot
451  * in the bpp param, and a ptr to the on-disk dquot within that buffer
452  */
453 STATIC int
454 xfs_qm_dqtobp(
455         xfs_trans_t             **tpp,
456         xfs_dquot_t             *dqp,
457         xfs_disk_dquot_t        **O_ddpp,
458         xfs_buf_t               **O_bpp,
459         uint                    flags)
460 {
461         struct xfs_bmbt_irec    map;
462         int                     nmaps = 1, error;
463         struct xfs_buf          *bp;
464         struct xfs_inode        *quotip;
465         struct xfs_mount        *mp = dqp->q_mount;
466         xfs_dqid_t              id = be32_to_cpu(dqp->q_core.d_id);
467         struct xfs_trans        *tp = (tpp ? *tpp : NULL);
468         uint                    lock_mode;
469 
470         quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
471         dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
472 
473         lock_mode = xfs_ilock_data_map_shared(quotip);
474         if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
475                 /*
476                  * Return if this type of quotas is turned off while we
477                  * didn't have the quota inode lock.
478                  */
479                 xfs_iunlock(quotip, lock_mode);
480                 return -ESRCH;
481         }
482 
483         /*
484          * Find the block map; no allocations yet
485          */
486         error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
487                                XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
488 
489         xfs_iunlock(quotip, lock_mode);
490         if (error)
491                 return error;
492 
493         ASSERT(nmaps == 1);
494         ASSERT(map.br_blockcount == 1);
495 
496         /*
497          * Offset of dquot in the (fixed sized) dquot chunk.
498          */
499         dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
500                 sizeof(xfs_dqblk_t);
501 
502         ASSERT(map.br_startblock != DELAYSTARTBLOCK);
503         if (map.br_startblock == HOLESTARTBLOCK) {
504                 /*
505                  * We don't allocate unless we're asked to
506                  */
507                 if (!(flags & XFS_QMOPT_DQALLOC))
508                         return -ENOENT;
509 
510                 ASSERT(tp);
511                 error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
512                                         dqp->q_fileoffset, &bp);
513                 if (error)
514                         return error;
515                 tp = *tpp;
516         } else {
517                 trace_xfs_dqtobp_read(dqp);
518 
519                 /*
520                  * store the blkno etc so that we don't have to do the
521                  * mapping all the time
522                  */
523                 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
524 
525                 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
526                                            dqp->q_blkno,
527                                            mp->m_quotainfo->qi_dqchunklen,
528                                            0, &bp, &xfs_dquot_buf_ops);
529 
530                 if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
531                         xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
532                                                 mp->m_quotainfo->qi_dqperchunk;
533                         ASSERT(bp == NULL);
534                         error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp);
535                 }
536 
537                 if (error) {
538                         ASSERT(bp == NULL);
539                         return error;
540                 }
541         }
542 
543         ASSERT(xfs_buf_islocked(bp));
544         *O_bpp = bp;
545         *O_ddpp = bp->b_addr + dqp->q_bufoffset;
546 
547         return 0;
548 }
549 
550 
551 /*
552  * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
553  * and release the buffer immediately.
554  *
555  * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
556  */
557 int
558 xfs_qm_dqread(
559         struct xfs_mount        *mp,
560         xfs_dqid_t              id,
561         uint                    type,
562         uint                    flags,
563         struct xfs_dquot        **O_dqpp)
564 {
565         struct xfs_dquot        *dqp;
566         struct xfs_disk_dquot   *ddqp;
567         struct xfs_buf          *bp;
568         struct xfs_trans        *tp = NULL;
569         int                     error;
570 
571         dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
572 
573         dqp->dq_flags = type;
574         dqp->q_core.d_id = cpu_to_be32(id);
575         dqp->q_mount = mp;
576         INIT_LIST_HEAD(&dqp->q_lru);
577         mutex_init(&dqp->q_qlock);
578         init_waitqueue_head(&dqp->q_pinwait);
579 
580         /*
581          * Because we want to use a counting completion, complete
582          * the flush completion once to allow a single access to
583          * the flush completion without blocking.
584          */
585         init_completion(&dqp->q_flush);
586         complete(&dqp->q_flush);
587 
588         /*
589          * Make sure group quotas have a different lock class than user
590          * quotas.
591          */
592         switch (type) {
593         case XFS_DQ_USER:
594                 /* uses the default lock class */
595                 break;
596         case XFS_DQ_GROUP:
597                 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class);
598                 break;
599         case XFS_DQ_PROJ:
600                 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class);
601                 break;
602         default:
603                 ASSERT(0);
604                 break;
605         }
606 
607         XFS_STATS_INC(mp, xs_qm_dquot);
608 
609         trace_xfs_dqread(dqp);
610 
611         if (flags & XFS_QMOPT_DQALLOC) {
612                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
613                                 XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
614                 if (error)
615                         goto error0;
616         }
617 
618         /*
619          * get a pointer to the on-disk dquot and the buffer containing it
620          * dqp already knows its own type (GROUP/USER).
621          */
622         error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
623         if (error) {
624                 /*
625                  * This can happen if quotas got turned off (ESRCH),
626                  * or if the dquot didn't exist on disk and we ask to
627                  * allocate (ENOENT).
628                  */
629                 trace_xfs_dqread_fail(dqp);
630                 goto error1;
631         }
632 
633         /* copy everything from disk dquot to the incore dquot */
634         memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
635         xfs_qm_dquot_logitem_init(dqp);
636 
637         /*
638          * Reservation counters are defined as reservation plus current usage
639          * to avoid having to add every time.
640          */
641         dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
642         dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
643         dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
644 
645         /* initialize the dquot speculative prealloc thresholds */
646         xfs_dquot_set_prealloc_limits(dqp);
647 
648         /* Mark the buf so that this will stay incore a little longer */
649         xfs_buf_set_ref(bp, XFS_DQUOT_REF);
650 
651         /*
652          * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
653          * So we need to release with xfs_trans_brelse().
654          * The strategy here is identical to that of inodes; we lock
655          * the dquot in xfs_qm_dqget() before making it accessible to
656          * others. This is because dquots, like inodes, need a good level of
657          * concurrency, and we don't want to take locks on the entire buffers
658          * for dquot accesses.
659          * Note also that the dquot buffer may even be dirty at this point, if
660          * this particular dquot was repaired. We still aren't afraid to
661          * brelse it because we have the changes incore.
662          */
663         ASSERT(xfs_buf_islocked(bp));
664         xfs_trans_brelse(tp, bp);
665 
666         if (tp) {
667                 error = xfs_trans_commit(tp);
668                 if (error)
669                         goto error0;
670         }
671 
672         *O_dqpp = dqp;
673         return error;
674 
675 error1:
676         if (tp)
677                 xfs_trans_cancel(tp);
678 error0:
679         xfs_qm_dqdestroy(dqp);
680         *O_dqpp = NULL;
681         return error;
682 }
683 
684 /*
685  * Advance to the next id in the current chunk, or if at the
686  * end of the chunk, skip ahead to first id in next allocated chunk
687  * using the SEEK_DATA interface.
688  */
689 static int
690 xfs_dq_get_next_id(
691         struct xfs_mount        *mp,
692         uint                    type,
693         xfs_dqid_t              *id)
694 {
695         struct xfs_inode        *quotip = xfs_quota_inode(mp, type);
696         xfs_dqid_t              next_id = *id + 1; /* simple advance */
697         uint                    lock_flags;
698         struct xfs_bmbt_irec    got;
699         struct xfs_iext_cursor  cur;
700         xfs_fsblock_t           start;
701         int                     error = 0;
702 
703         /* If we'd wrap past the max ID, stop */
704         if (next_id < *id)
705                 return -ENOENT;
706 
707         /* If new ID is within the current chunk, advancing it sufficed */
708         if (next_id % mp->m_quotainfo->qi_dqperchunk) {
709                 *id = next_id;
710                 return 0;
711         }
712 
713         /* Nope, next_id is now past the current chunk, so find the next one */
714         start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
715 
716         lock_flags = xfs_ilock_data_map_shared(quotip);
717         if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) {
718                 error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK);
719                 if (error)
720                         return error;
721         }
722 
723         if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &cur, &got)) {
724                 /* contiguous chunk, bump startoff for the id calculation */
725                 if (got.br_startoff < start)
726                         got.br_startoff = start;
727                 *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk;
728         } else {
729                 error = -ENOENT;
730         }
731 
732         xfs_iunlock(quotip, lock_flags);
733 
734         return error;
735 }
736 
737 /*
738  * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
739  * a locked dquot, doing an allocation (if requested) as needed.
740  * When both an inode and an id are given, the inode's id takes precedence.
741  * That is, if the id changes while we don't hold the ilock inside this
742  * function, the new dquot is returned, not necessarily the one requested
743  * in the id argument.
744  */
745 int
746 xfs_qm_dqget(
747         xfs_mount_t     *mp,
748         xfs_inode_t     *ip,      /* locked inode (optional) */
749         xfs_dqid_t      id,       /* uid/projid/gid depending on type */
750         uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
751         uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
752         xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
753 {
754         struct xfs_quotainfo    *qi = mp->m_quotainfo;
755         struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
756         struct xfs_dquot        *dqp;
757         int                     error;
758 
759         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
760         if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
761             (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
762             (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
763                 return -ESRCH;
764         }
765 
766         ASSERT(type == XFS_DQ_USER ||
767                type == XFS_DQ_PROJ ||
768                type == XFS_DQ_GROUP);
769         if (ip) {
770                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
771                 ASSERT(xfs_inode_dquot(ip, type) == NULL);
772         }
773 
774 restart:
775         mutex_lock(&qi->qi_tree_lock);
776         dqp = radix_tree_lookup(tree, id);
777         if (dqp) {
778                 xfs_dqlock(dqp);
779                 if (dqp->dq_flags & XFS_DQ_FREEING) {
780                         xfs_dqunlock(dqp);
781                         mutex_unlock(&qi->qi_tree_lock);
782                         trace_xfs_dqget_freeing(dqp);
783                         delay(1);
784                         goto restart;
785                 }
786 
787                 /* uninit / unused quota found in radix tree, keep looking  */
788                 if (flags & XFS_QMOPT_DQNEXT) {
789                         if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
790                                 xfs_dqunlock(dqp);
791                                 mutex_unlock(&qi->qi_tree_lock);
792                                 error = xfs_dq_get_next_id(mp, type, &id);
793                                 if (error)
794                                         return error;
795                                 goto restart;
796                         }
797                 }
798 
799                 dqp->q_nrefs++;
800                 mutex_unlock(&qi->qi_tree_lock);
801 
802                 trace_xfs_dqget_hit(dqp);
803                 XFS_STATS_INC(mp, xs_qm_dqcachehits);
804                 *O_dqpp = dqp;
805                 return 0;
806         }
807         mutex_unlock(&qi->qi_tree_lock);
808         XFS_STATS_INC(mp, xs_qm_dqcachemisses);
809 
810         /*
811          * Dquot cache miss. We don't want to keep the inode lock across
812          * a (potential) disk read. Also we don't want to deal with the lock
813          * ordering between quotainode and this inode. OTOH, dropping the inode
814          * lock here means dealing with a chown that can happen before
815          * we re-acquire the lock.
816          */
817         if (ip)
818                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
819 
820         error = xfs_qm_dqread(mp, id, type, flags, &dqp);
821 
822         if (ip)
823                 xfs_ilock(ip, XFS_ILOCK_EXCL);
824 
825         /* If we are asked to find next active id, keep looking */
826         if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
827                 error = xfs_dq_get_next_id(mp, type, &id);
828                 if (!error)
829                         goto restart;
830         }
831 
832         if (error)
833                 return error;
834 
835         if (ip) {
836                 /*
837                  * A dquot could be attached to this inode by now, since
838                  * we had dropped the ilock.
839                  */
840                 if (xfs_this_quota_on(mp, type)) {
841                         struct xfs_dquot        *dqp1;
842 
843                         dqp1 = xfs_inode_dquot(ip, type);
844                         if (dqp1) {
845                                 xfs_qm_dqdestroy(dqp);
846                                 dqp = dqp1;
847                                 xfs_dqlock(dqp);
848                                 goto dqret;
849                         }
850                 } else {
851                         /* inode stays locked on return */
852                         xfs_qm_dqdestroy(dqp);
853                         return -ESRCH;
854                 }
855         }
856 
857         mutex_lock(&qi->qi_tree_lock);
858         error = radix_tree_insert(tree, id, dqp);
859         if (unlikely(error)) {
860                 WARN_ON(error != -EEXIST);
861 
862                 /*
863                  * Duplicate found. Just throw away the new dquot and start
864                  * over.
865                  */
866                 mutex_unlock(&qi->qi_tree_lock);
867                 trace_xfs_dqget_dup(dqp);
868                 xfs_qm_dqdestroy(dqp);
869                 XFS_STATS_INC(mp, xs_qm_dquot_dups);
870                 goto restart;
871         }
872 
873         /*
874          * We return a locked dquot to the caller, with a reference taken
875          */
876         xfs_dqlock(dqp);
877         dqp->q_nrefs = 1;
878 
879         qi->qi_dquots++;
880         mutex_unlock(&qi->qi_tree_lock);
881 
882         /* If we are asked to find next active id, keep looking */
883         if (flags & XFS_QMOPT_DQNEXT) {
884                 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
885                         xfs_qm_dqput(dqp);
886                         error = xfs_dq_get_next_id(mp, type, &id);
887                         if (error)
888                                 return error;
889                         goto restart;
890                 }
891         }
892 
893  dqret:
894         ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
895         trace_xfs_dqget_miss(dqp);
896         *O_dqpp = dqp;
897         return 0;
898 }
899 
900 /*
901  * Release a reference to the dquot (decrement ref-count) and unlock it.
902  *
903  * If there is a group quota attached to this dquot, carefully release that
904  * too without tripping over deadlocks'n'stuff.
905  */
906 void
907 xfs_qm_dqput(
908         struct xfs_dquot        *dqp)
909 {
910         ASSERT(dqp->q_nrefs > 0);
911         ASSERT(XFS_DQ_IS_LOCKED(dqp));
912 
913         trace_xfs_dqput(dqp);
914 
915         if (--dqp->q_nrefs == 0) {
916                 struct xfs_quotainfo    *qi = dqp->q_mount->m_quotainfo;
917                 trace_xfs_dqput_free(dqp);
918 
919                 if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
920                         XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused);
921         }
922         xfs_dqunlock(dqp);
923 }
924 
925 /*
926  * Release a dquot. Flush it if dirty, then dqput() it.
927  * dquot must not be locked.
928  */
929 void
930 xfs_qm_dqrele(
931         xfs_dquot_t     *dqp)
932 {
933         if (!dqp)
934                 return;
935 
936         trace_xfs_dqrele(dqp);
937 
938         xfs_dqlock(dqp);
939         /*
940          * We don't care to flush it if the dquot is dirty here.
941          * That will create stutters that we want to avoid.
942          * Instead we do a delayed write when we try to reclaim
943          * a dirty dquot. Also xfs_sync will take part of the burden...
944          */
945         xfs_qm_dqput(dqp);
946 }
947 
948 /*
949  * This is the dquot flushing I/O completion routine.  It is called
950  * from interrupt level when the buffer containing the dquot is
951  * flushed to disk.  It is responsible for removing the dquot logitem
952  * from the AIL if it has not been re-logged, and unlocking the dquot's
953  * flush lock. This behavior is very similar to that of inodes..
954  */
955 STATIC void
956 xfs_qm_dqflush_done(
957         struct xfs_buf          *bp,
958         struct xfs_log_item     *lip)
959 {
960         xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
961         xfs_dquot_t             *dqp = qip->qli_dquot;
962         struct xfs_ail          *ailp = lip->li_ailp;
963 
964         /*
965          * We only want to pull the item from the AIL if its
966          * location in the log has not changed since we started the flush.
967          * Thus, we only bother if the dquot's lsn has
968          * not changed. First we check the lsn outside the lock
969          * since it's cheaper, and then we recheck while
970          * holding the lock before removing the dquot from the AIL.
971          */
972         if ((lip->li_flags & XFS_LI_IN_AIL) &&
973             ((lip->li_lsn == qip->qli_flush_lsn) ||
974              (lip->li_flags & XFS_LI_FAILED))) {
975 
976                 /* xfs_trans_ail_delete() drops the AIL lock. */
977                 spin_lock(&ailp->xa_lock);
978                 if (lip->li_lsn == qip->qli_flush_lsn) {
979                         xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
980                 } else {
981                         /*
982                          * Clear the failed state since we are about to drop the
983                          * flush lock
984                          */
985                         if (lip->li_flags & XFS_LI_FAILED)
986                                 xfs_clear_li_failed(lip);
987                         spin_unlock(&ailp->xa_lock);
988                 }
989         }
990 
991         /*
992          * Release the dq's flush lock since we're done with it.
993          */
994         xfs_dqfunlock(dqp);
995 }
996 
997 /*
998  * Write a modified dquot to disk.
999  * The dquot must be locked and the flush lock too taken by caller.
1000  * The flush lock will not be unlocked until the dquot reaches the disk,
1001  * but the dquot is free to be unlocked and modified by the caller
1002  * in the interim. Dquot is still locked on return. This behavior is
1003  * identical to that of inodes.
1004  */
1005 int
1006 xfs_qm_dqflush(
1007         struct xfs_dquot        *dqp,
1008         struct xfs_buf          **bpp)
1009 {
1010         struct xfs_mount        *mp = dqp->q_mount;
1011         struct xfs_buf          *bp;
1012         struct xfs_disk_dquot   *ddqp;
1013         int                     error;
1014 
1015         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1016         ASSERT(!completion_done(&dqp->q_flush));
1017 
1018         trace_xfs_dqflush(dqp);
1019 
1020         *bpp = NULL;
1021 
1022         xfs_qm_dqunpin_wait(dqp);
1023 
1024         /*
1025          * This may have been unpinned because the filesystem is shutting
1026          * down forcibly. If that's the case we must not write this dquot
1027          * to disk, because the log record didn't make it to disk.
1028          *
1029          * We also have to remove the log item from the AIL in this case,
1030          * as we wait for an emptry AIL as part of the unmount process.
1031          */
1032         if (XFS_FORCED_SHUTDOWN(mp)) {
1033                 struct xfs_log_item     *lip = &dqp->q_logitem.qli_item;
1034                 dqp->dq_flags &= ~XFS_DQ_DIRTY;
1035 
1036                 xfs_trans_ail_remove(lip, SHUTDOWN_CORRUPT_INCORE);
1037 
1038                 error = -EIO;
1039                 goto out_unlock;
1040         }
1041 
1042         /*
1043          * Get the buffer containing the on-disk dquot
1044          */
1045         error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
1046                                    mp->m_quotainfo->qi_dqchunklen, 0, &bp,
1047                                    &xfs_dquot_buf_ops);
1048         if (error)
1049                 goto out_unlock;
1050 
1051         /*
1052          * Calculate the location of the dquot inside the buffer.
1053          */
1054         ddqp = bp->b_addr + dqp->q_bufoffset;
1055 
1056         /*
1057          * A simple sanity check in case we got a corrupted dquot..
1058          */
1059         error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
1060                            XFS_QMOPT_DOWARN, "dqflush (incore copy)");
1061         if (error) {
1062                 xfs_buf_relse(bp);
1063                 xfs_dqfunlock(dqp);
1064                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1065                 return -EIO;
1066         }
1067 
1068         /* This is the only portion of data that needs to persist */
1069         memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
1070 
1071         /*
1072          * Clear the dirty field and remember the flush lsn for later use.
1073          */
1074         dqp->dq_flags &= ~XFS_DQ_DIRTY;
1075 
1076         xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1077                                         &dqp->q_logitem.qli_item.li_lsn);
1078 
1079         /*
1080          * copy the lsn into the on-disk dquot now while we have the in memory
1081          * dquot here. This can't be done later in the write verifier as we
1082          * can't get access to the log item at that point in time.
1083          *
1084          * We also calculate the CRC here so that the on-disk dquot in the
1085          * buffer always has a valid CRC. This ensures there is no possibility
1086          * of a dquot without an up-to-date CRC getting to disk.
1087          */
1088         if (xfs_sb_version_hascrc(&mp->m_sb)) {
1089                 struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
1090 
1091                 dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
1092                 xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
1093                                  XFS_DQUOT_CRC_OFF);
1094         }
1095 
1096         /*
1097          * Attach an iodone routine so that we can remove this dquot from the
1098          * AIL and release the flush lock once the dquot is synced to disk.
1099          */
1100         xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
1101                                   &dqp->q_logitem.qli_item);
1102 
1103         /*
1104          * If the buffer is pinned then push on the log so we won't
1105          * get stuck waiting in the write for too long.
1106          */
1107         if (xfs_buf_ispinned(bp)) {
1108                 trace_xfs_dqflush_force(dqp);
1109                 xfs_log_force(mp, 0);
1110         }
1111 
1112         trace_xfs_dqflush_done(dqp);
1113         *bpp = bp;
1114         return 0;
1115 
1116 out_unlock:
1117         xfs_dqfunlock(dqp);
1118         return -EIO;
1119 }
1120 
1121 /*
1122  * Lock two xfs_dquot structures.
1123  *
1124  * To avoid deadlocks we always lock the quota structure with
1125  * the lowerd id first.
1126  */
1127 void
1128 xfs_dqlock2(
1129         xfs_dquot_t     *d1,
1130         xfs_dquot_t     *d2)
1131 {
1132         if (d1 && d2) {
1133                 ASSERT(d1 != d2);
1134                 if (be32_to_cpu(d1->q_core.d_id) >
1135                     be32_to_cpu(d2->q_core.d_id)) {
1136                         mutex_lock(&d2->q_qlock);
1137                         mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
1138                 } else {
1139                         mutex_lock(&d1->q_qlock);
1140                         mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
1141                 }
1142         } else if (d1) {
1143                 mutex_lock(&d1->q_qlock);
1144         } else if (d2) {
1145                 mutex_lock(&d2->q_qlock);
1146         }
1147 }
1148 
1149 int __init
1150 xfs_qm_init(void)
1151 {
1152         xfs_qm_dqzone =
1153                 kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
1154         if (!xfs_qm_dqzone)
1155                 goto out;
1156 
1157         xfs_qm_dqtrxzone =
1158                 kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
1159         if (!xfs_qm_dqtrxzone)
1160                 goto out_free_dqzone;
1161 
1162         return 0;
1163 
1164 out_free_dqzone:
1165         kmem_zone_destroy(xfs_qm_dqzone);
1166 out:
1167         return -ENOMEM;
1168 }
1169 
1170 void
1171 xfs_qm_exit(void)
1172 {
1173         kmem_zone_destroy(xfs_qm_dqtrxzone);
1174         kmem_zone_destroy(xfs_qm_dqzone);
1175 }
1176 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp