[16] | 1 | /*************************************************************************** |
---|
| 2 | * |
---|
| 3 | * Copyright (C) 2001 International Business Machines |
---|
| 4 | * All rights reserved. |
---|
| 5 | * |
---|
| 6 | * This file is part of the GPFS mmfslinux kernel module. |
---|
| 7 | * |
---|
| 8 | * Redistribution and use in source and binary forms, with or without |
---|
| 9 | * modification, are permitted provided that the following conditions |
---|
| 10 | * are met: |
---|
| 11 | * |
---|
| 12 | * 1. Redistributions of source code must retain the above copyright notice, |
---|
| 13 | * this list of conditions and the following disclaimer. |
---|
| 14 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
| 15 | * notice, this list of conditions and the following disclaimer in the |
---|
| 16 | * documentation and/or other materials provided with the distribution. |
---|
| 17 | * 3. The name of the author may not be used to endorse or promote products |
---|
| 18 | * derived from this software without specific prior written |
---|
| 19 | * permission. |
---|
| 20 | * |
---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
---|
| 22 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
---|
| 23 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
---|
| 24 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
| 25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
| 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
---|
| 27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
---|
| 28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
---|
| 29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
---|
| 30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
| 31 | * |
---|
| 32 | *************************************************************************** */ |
---|
| 33 | /* @(#)94 1.48 src/avs/fs/mmfs/ts/kernext/gpl-linux/dir.c, mmfs, avs_rgpfs24, rgpfs240610b 11/8/05 10:20:56 */ |
---|
| 34 | |
---|
| 35 | #define __NO_VERSION__ |
---|
| 36 | |
---|
| 37 | #ifndef __KERNEL__ |
---|
| 38 | #define __KERNEL__ |
---|
| 39 | #endif |
---|
| 40 | |
---|
| 41 | #include <Shark-gpl.h> |
---|
| 42 | |
---|
| 43 | #include <linux/fs.h> |
---|
| 44 | #include <linux/sched.h> |
---|
| 45 | |
---|
| 46 | #include <linux2gpfs.h> |
---|
| 47 | #include <cxiSystem.h> |
---|
| 48 | #include <cxiTypes.h> |
---|
| 49 | #include <cxiIOBuffer.h> |
---|
| 50 | #include <cxiSharedSeg.h> |
---|
| 51 | #include <cxiCred.h> |
---|
| 52 | #include <cxi2gpfs.h> |
---|
| 53 | #include <Trace.h> |
---|
| 54 | #include <verdep.h> |
---|
| 55 | |
---|
| 56 | |
---|
| 57 | /* About dcache revalidation: |
---|
| 58 | |
---|
| 59 | The Linux directory cache (dcache) is used to cache the result of name |
---|
| 60 | lookups. Linux caches positive as well as negative lookup results in its |
---|
| 61 | dcache entries (struct dentry): if the file existed at the time the last |
---|
| 62 | lookup was done (positive lookup), dentry->d_inode will point to the struct |
---|
| 63 | inode of the file; if the file did not exist (negative lookup), |
---|
| 64 | dentry->d_inode will be null. |
---|
| 65 | |
---|
| 66 | When a directory is modified on the local node, Linux will update its |
---|
| 67 | dcache entries accordingly. When the directory is modified on another |
---|
| 68 | node, however, we need to invalidate local dcache entries: |
---|
| 69 | |
---|
| 70 | - A negative dcache entry becomes invalid when a file by the same name is |
---|
| 71 | created on another node. This requires an exclusive byte-range token on |
---|
| 72 | the directory block in which the lookup was done that resulted in the |
---|
| 73 | dcache entry. Hence, when we lose a byte-range token on a directory, we |
---|
| 74 | invalidate all negative dcache entries for lookups that were done in |
---|
| 75 | that directory. This is done by a call to kxinvalidateOSNode with |
---|
| 76 | KXIVO_NEGDCACHE, which will result in a call to |
---|
| 77 | cxiInvalidateNegDCacheEntry() implemented here. |
---|
| 78 | |
---|
| 79 | - A positive dcache entry becomes invalid when the file it refers to |
---|
| 80 | is deleted, moved, or renamed on another node. All of these operations |
---|
| 81 | require an exclusive inode lock. Hence we invalidate a positive dcache |
---|
| 82 | entry when we lose the inode token for the file. This more selective |
---|
| 83 | invalidation of positive dcache entries is more efficient than simply |
---|
| 84 | invalidating all dcache entries when we lose a byte-range token on the |
---|
| 85 | directory. The invalidation is done by a call to kxinvalidateOSNode |
---|
| 86 | with CXI_IC_DCACHE, which will result in a call to |
---|
| 87 | cxiInvalidateDCacheEntry() implemented here. |
---|
| 88 | |
---|
| 89 | To invalidate a dcache entry Linux defines a d_revalidate function in the |
---|
| 90 | dentry_operations table. This function is supposed to check whether the |
---|
| 91 | dcache entry is still valid and return 'true' or 'false' accordingly. |
---|
| 92 | If no d_revalidate function is given in the dentry_operations table, |
---|
| 93 | Linux assumes the dentry is valid. Hence the most efficient way |
---|
| 94 | of marking a dentry as valid or invalid is to have the d_ops field in |
---|
| 95 | the dentry point to one of two different dentry_operations tables: |
---|
| 96 | one where the d_revalidate field is NULL (means the dentry is valid), |
---|
| 97 | and one where d_revalidate points at a function that always returns false |
---|
| 98 | (means the dentry is invalid). */ |
---|
| 99 | |
---|
| 100 | |
---|
| 101 | /* This call handles pruning off all unheld dentries pointing at an |
---|
| 102 | * inode. Normally pruning is not done by any daemon thread directly |
---|
| 103 | * (ie. token revoke) because d_prune_aliases may initiate a string of |
---|
| 104 | * callbacks due to iput. These callbacks may need to communicate back |
---|
| 105 | * to the daemon which can be problematic if there is a mailbox shortage. |
---|
| 106 | * Hence most dentry invalidation marks the cxiNode as needing a dentry |
---|
| 107 | * prune and the GPFS swapd is notified to call cxiPruneDCacheEntry in a |
---|
| 108 | * separate thread. |
---|
| 109 | * |
---|
| 110 | * Caller must be prepared to receive iput() callback into GPFS. |
---|
| 111 | * Caller must have a reference on cxiNode_t to ensure it doesn't |
---|
| 112 | * go away during processing. |
---|
| 113 | */ |
---|
| 114 | int |
---|
| 115 | cxiPruneDCacheEntry(cxiNode_t *cnP) |
---|
| 116 | { |
---|
| 117 | struct inode *iP = (struct inode *)cnP->osNodeP; |
---|
| 118 | struct list_head *dListP, *dHeadP; |
---|
| 119 | struct dentry *dentry; |
---|
| 120 | Boolean hasSubdirs = false; |
---|
| 121 | int refCount = 0; |
---|
| 122 | |
---|
| 123 | ENTER(0); |
---|
| 124 | TRACE2(TRACE_VNODE, 4, TRCID_PRUNE_DCACHE, |
---|
| 125 | "cxiPruneDCacheEntry: iP 0x%lX inode %d", iP, iP->i_ino); |
---|
| 126 | |
---|
| 127 | /* About to prune it so flag is no longer needed */ |
---|
| 128 | ClearCtFlag(cnP, pruneDCacheNeeded); |
---|
| 129 | |
---|
| 130 | /* This call prunes any unheld dentries pointing at the inode */ |
---|
| 131 | d_prune_aliases(iP); |
---|
| 132 | |
---|
| 133 | /* Traverse the list of all dentries that still refer to this file. */ |
---|
| 134 | dHeadP = &iP->i_dentry; |
---|
| 135 | spin_lock(&dcache_lock); |
---|
| 136 | for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next) |
---|
| 137 | { |
---|
| 138 | /* count dentries that still refer to this file */ |
---|
| 139 | refCount++; |
---|
| 140 | |
---|
| 141 | dentry = list_entry(dListP, struct dentry, d_alias); |
---|
| 142 | hasSubdirs = !list_empty(&dentry->d_subdirs); |
---|
| 143 | |
---|
| 144 | TRACE5N(TRACE_VNODE, 4, TRCID_PRUNE_DCACHE_ALIAS, |
---|
| 145 | "cxiPruneDCacheEntry: ip 0x%lX ino %d alias dentry 0x%lX " |
---|
| 146 | "hasSubdirs %d name '%s'", iP, iP->i_ino, dentry, |
---|
| 147 | hasSubdirs, dentry->d_name.name); |
---|
| 148 | |
---|
| 149 | /* Attempt to prune unused children. Helps keep stat cache manageable */ |
---|
| 150 | if (hasSubdirs) |
---|
| 151 | { |
---|
| 152 | dget_locked(dentry); |
---|
| 153 | spin_unlock(&dcache_lock); |
---|
| 154 | |
---|
| 155 | /* This call walks the tree starting at this parent dentry and |
---|
| 156 | * will successfully uncache child dentries that aren't held by |
---|
| 157 | * user programs and iput their associated inodes (resulting in |
---|
| 158 | * many cases of the inode i_count going to 0. iput() may however |
---|
| 159 | * just put these inodes on the unused list if they are still |
---|
| 160 | * valid (i_nlink > 0) and linked on i_hash. Thus in many cases while |
---|
| 161 | * the dentries immediately disappear their associated inode don't |
---|
| 162 | * have an immediate clear_inode() called on them. Subsequent |
---|
| 163 | * pruning (by kswapd) should shrink the icache for unused inodes |
---|
| 164 | * resulting in the gpfs_s_clear_inode callback for these inodes. |
---|
| 165 | */ |
---|
| 166 | shrink_dcache_parent(dentry); |
---|
| 167 | dput(dentry); |
---|
| 168 | |
---|
| 169 | /* For directories we don't support hard links so we shouldn't |
---|
| 170 | * have multiple dentries that need to be pruned. Hence |
---|
| 171 | * after having dropped the dcache lock we break out of this |
---|
| 172 | * for loop. |
---|
| 173 | */ |
---|
| 174 | break; |
---|
| 175 | } |
---|
| 176 | } |
---|
| 177 | if (!hasSubdirs) |
---|
| 178 | spin_unlock(&dcache_lock); |
---|
| 179 | |
---|
| 180 | EXIT(0); |
---|
| 181 | return refCount; |
---|
| 182 | } |
---|
| 183 | |
---|
| 184 | /* Mark the dentry as needing a revalidate. Called after losing |
---|
| 185 | * a token protecting the attributes of this dcache entry. |
---|
| 186 | */ |
---|
| 187 | int |
---|
| 188 | cxiInvalidateDCacheEntry(cxiNode_t *cnP) |
---|
| 189 | { |
---|
| 190 | struct inode *iP = (struct inode *)cnP->osNodeP; |
---|
| 191 | struct list_head *dListP, *dHeadP; |
---|
| 192 | struct dentry *dentry; |
---|
| 193 | int refCount = 0; |
---|
| 194 | |
---|
| 195 | /* Traverse the list of all dentries that refer to this file. */ |
---|
| 196 | ENTER(0); |
---|
| 197 | TRACE2(TRACE_VNODE, 4, TRCID_INVAL_DCACHE, |
---|
| 198 | "cxiInvalidateDCacheEntry: ip 0x%lX inode %d", iP, iP->i_ino); |
---|
| 199 | |
---|
| 200 | dHeadP = &iP->i_dentry; |
---|
| 201 | spin_lock(&dcache_lock); |
---|
| 202 | for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next) |
---|
| 203 | { |
---|
| 204 | refCount++; |
---|
| 205 | |
---|
| 206 | /* Mark the entry as needing revalidation by setting the d_op |
---|
| 207 | * function table to gpfs_dops_revalidate. Since this dentry |
---|
| 208 | * is staying in the vfs we can't declare it invalid, or a legitimate |
---|
| 209 | * stat on it may return ESTALE. |
---|
| 210 | * Scenario: node a) mkdir foo; cd foo |
---|
| 211 | * node b) chmod 500 foo |
---|
| 212 | * node a) ls -al |
---|
| 213 | * must succeed. |
---|
| 214 | */ |
---|
| 215 | dentry = list_entry(dListP, struct dentry, d_alias); |
---|
| 216 | dentry->d_op = &gpfs_dops_revalidate; |
---|
| 217 | |
---|
| 218 | TRACE4N(TRACE_VNODE, 4, TRCID_INVAL_DCACHE_ALIAS, |
---|
| 219 | "cxiInvalidateDCacheEntry: ip 0x%lX ino %d " |
---|
| 220 | "alias dentry 0x%lX name '%s'", |
---|
| 221 | iP, iP->i_ino, dentry, dentry->d_name.name); |
---|
| 222 | |
---|
| 223 | if (TestCtFlag(cnP, destroyIfDelInode)) |
---|
| 224 | { |
---|
| 225 | /* If the file was deleted, marking the dentry invalid is not |
---|
| 226 | * sufficient. If we leave the dentry in the cache marked as |
---|
| 227 | * invalid, it will remain in the cache until: |
---|
| 228 | * |
---|
| 229 | * a) if it has a zero d_count then the scheduled GPFS swapd |
---|
| 230 | * d_prune_aliases will get rid of it |
---|
| 231 | * b) if it has a nonzero d_count (it's open) then d_prune_aliases |
---|
| 232 | * would not prune it and it would stay in the cache until the |
---|
| 233 | * next lookup finds it and calls d_invalidate, which might not |
---|
| 234 | * ever happen. |
---|
| 235 | * |
---|
| 236 | * Thus we drop the dentry and the final close or schedule |
---|
| 237 | * d_prune_aliases will remove it. |
---|
| 238 | */ |
---|
| 239 | DENTRY_DROP(dentry); |
---|
| 240 | } |
---|
| 241 | |
---|
| 242 | /* Dentries for this cxiNode_t should be pruned by GPFS swapd thread |
---|
| 243 | * which will be signalled by the caller of this routine. |
---|
| 244 | */ |
---|
| 245 | SetCtFlag(cnP, pruneDCacheNeeded); |
---|
| 246 | } |
---|
| 247 | spin_unlock(&dcache_lock); |
---|
| 248 | |
---|
| 249 | EXIT(0); |
---|
| 250 | return refCount; |
---|
| 251 | } |
---|
| 252 | |
---|
| 253 | /* The following function is called to remove invalid dcache entries for a |
---|
| 254 | file when the file is deleted on this node. |
---|
| 255 | Such invalid dcache entries occur when a file is renamed on another node |
---|
| 256 | before it is deleted here. The rename revokes the inode token, which marks |
---|
| 257 | the dcache entry invalid, but does not remove it from the cache on this |
---|
| 258 | node. When the file is deleted, the delete operation on this node will |
---|
| 259 | look up the file under its new name and turn the (new) dcache entry into a |
---|
| 260 | negative dcache entry, but since the file was renamed, it will not find or |
---|
| 261 | process the old, invalid dcache entry (the one referring to the old file |
---|
| 262 | name). This function is called during delete (when the link count goes to |
---|
| 263 | zero) to remove old, invalid dcache entries, so the file can be destroyed. |
---|
| 264 | The function is similar to cxiInvalidateDCacheEntry, with the following |
---|
| 265 | differences: (1) it is only called on files that are being deleted (link |
---|
| 266 | count zero and destroyIfDelInode flag already set), (2) it does not mark |
---|
| 267 | any dcache entries as invalid; instead, it (3) only drops dcache entries |
---|
| 268 | that are already marked as invalid. In particular, we do not want to |
---|
| 269 | invalidate the dcache entry referring to the current name being unlinked, |
---|
| 270 | because unlink will turn this into a valid, negative dcache entry. */ |
---|
| 271 | void |
---|
| 272 | cxiDropInvalidDCacheEntry(cxiNode_t *cnP) |
---|
| 273 | { |
---|
| 274 | struct inode *iP = (struct inode *)cnP->osNodeP; |
---|
| 275 | struct list_head *dListP, *dHeadP; |
---|
| 276 | struct dentry *dentry; |
---|
| 277 | int holdCount; |
---|
| 278 | |
---|
| 279 | ENTER(0); |
---|
| 280 | TRACE2(TRACE_VNODE, 4, TRCID_DROP_INVAL_DCACHE, |
---|
| 281 | "cxiDropInvalidDCacheEntry: iP 0x%lX i_ino %d", |
---|
| 282 | iP, iP->i_ino); |
---|
| 283 | |
---|
| 284 | DBGASSERT(TestCtFlag(cnP, destroyIfDelInode)); |
---|
| 285 | |
---|
| 286 | /* Traverse the list of all dentries that still refer to this file. */ |
---|
| 287 | dHeadP = &iP->i_dentry; |
---|
| 288 | spin_lock(&dcache_lock); |
---|
| 289 | for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next) |
---|
| 290 | { |
---|
| 291 | /* Check whether this dentry mas been marked invalid */ |
---|
| 292 | dentry = list_entry(dListP, struct dentry, d_alias); |
---|
| 293 | if (dentry->d_op == &gpfs_dops_invalid || |
---|
| 294 | dentry->d_op == &gpfs_dops_revalidate) |
---|
| 295 | { |
---|
| 296 | TRACE4N(TRACE_VNODE, 4, TRCID_DROP_INVAL_DCACHE_ALIAS, |
---|
| 297 | "cxiDropInvalidDCacheEntry: ip 0x%lX ino %d " |
---|
| 298 | "removing dentry 0x%lX name '%s'\n", |
---|
| 299 | iP, iP->i_ino, dentry, dentry->d_name.name); |
---|
| 300 | |
---|
| 301 | /* Drop the dcache entry. See details in cxiInvalidateDCacheEntry */ |
---|
| 302 | DENTRY_DROP(dentry); |
---|
| 303 | |
---|
| 304 | /* Dentries for this cxiNode_t should be pruned */ |
---|
| 305 | SetCtFlag(cnP, pruneDCacheNeeded); |
---|
| 306 | } |
---|
| 307 | } |
---|
| 308 | spin_unlock(&dcache_lock); |
---|
| 309 | EXIT(0); |
---|
| 310 | } |
---|
| 311 | |
---|
| 312 | /* The following function is called to invalidate negative dcache entries for |
---|
| 313 | all files in a directory when we lose the BR token for the directory. */ |
---|
| 314 | int |
---|
| 315 | cxiInvalidateNegDCacheEntry(cxiNode_t *cnP) |
---|
| 316 | { |
---|
| 317 | struct inode *iP = (struct inode *)cnP->osNodeP; |
---|
| 318 | struct list_head *dListP, *dHeadP; |
---|
| 319 | struct list_head *cListP, *cHeadP; |
---|
| 320 | struct dentry *dentry, *child; |
---|
| 321 | int refCount = 0; |
---|
| 322 | |
---|
| 323 | ENTER(0); |
---|
| 324 | TRACE2(TRACE_VNODE, 4, TRCID_INVAL_NEG_DCACHE, |
---|
| 325 | "cxiInvalidateNegDCacheEntry: iP 0x%lX inode %d", |
---|
| 326 | iP, iP->i_ino); |
---|
| 327 | |
---|
| 328 | /* Traverse the list of all dentries that refer to this directory. |
---|
| 329 | Note: since we don't support hard links to directories, we expect |
---|
| 330 | there to be exactly one dentry on this list. */ |
---|
| 331 | dHeadP = &iP->i_dentry; |
---|
| 332 | spin_lock(&dcache_lock); |
---|
| 333 | for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next) |
---|
| 334 | { |
---|
| 335 | refCount++; |
---|
| 336 | |
---|
| 337 | /* traverse the list of all children of this dentry */ |
---|
| 338 | dentry = list_entry(dListP, struct dentry, d_alias); |
---|
| 339 | cHeadP = &dentry->d_subdirs; |
---|
| 340 | for (cListP = cHeadP->next; cListP != cHeadP; cListP = cListP->next) |
---|
| 341 | { |
---|
| 342 | /* If this child is a negative dentry (d_inode pointer is NULL), |
---|
| 343 | mark the entry invalid by setting the dop function table to |
---|
| 344 | gpfs_dops_invalid, which contains a d_revalidate function that |
---|
| 345 | always returns false. Also handle dcache entries that are |
---|
| 346 | about to be deleted (unlink operation pending but not yet complete). |
---|
| 347 | These entries still have a non-null d_inode pointer, but are |
---|
| 348 | marked as "delete pending" by having a different d_op table. |
---|
| 349 | We should not mark the latter as invalid, because we don't know |
---|
| 350 | yet whether the delete operation is going to succeed, so we mark |
---|
| 351 | those dentries as "needing revalidation". (see also comments |
---|
| 352 | in gpfs_i_unlink and gpfs_i_rmdir). */ |
---|
| 353 | child = list_entry(cListP, struct dentry, d_child); |
---|
| 354 | if (!child->d_inode || child->d_op == &gpfs_dops_ddeletepending) |
---|
| 355 | { |
---|
| 356 | child->d_op = !child->d_inode ? |
---|
| 357 | &gpfs_dops_invalid : &gpfs_dops_revalidate; |
---|
| 358 | |
---|
| 359 | TRACE5N(TRACE_VNODE, 4, TRCID_INVAL_NEG_DUNCACHE, |
---|
| 360 | "cxiInvalidateNegDCacheEntry: ip 0x%lX ino %d " |
---|
| 361 | "%s dentry 0x%lX name '%s'", |
---|
| 362 | iP, iP->i_ino, |
---|
| 363 | !child->d_inode ? "inval" : "reval", |
---|
| 364 | child, child->d_name.name); |
---|
| 365 | } |
---|
| 366 | } |
---|
| 367 | } |
---|
| 368 | spin_unlock(&dcache_lock); |
---|
| 369 | |
---|
| 370 | EXIT(0); |
---|
| 371 | return refCount; |
---|
| 372 | } |
---|
| 373 | |
---|
| 374 | /* dentry_operations */ |
---|
| 375 | |
---|
| 376 | /* The d_revalidate function is expected to check whether the directory entry |
---|
| 377 | * cached in the given dentry struct is still valid. |
---|
| 378 | */ |
---|
| 379 | int |
---|
| 380 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
| 381 | gpfs_d_invalid(struct dentry *dentry, struct nameidata *ni) |
---|
| 382 | #else |
---|
| 383 | gpfs_d_invalid(struct dentry *dentry, int flags) |
---|
| 384 | #endif |
---|
| 385 | { |
---|
| 386 | TRACE3(TRACE_VNODE, 4, TRCID_DIR_001, |
---|
| 387 | "gpfs_d_invalid: dentry 0x%lX d_inode 0x%lX name '%s' is invalid", |
---|
| 388 | dentry, dentry->d_inode, dentry->d_name.name); |
---|
| 389 | return false; |
---|
| 390 | } |
---|
| 391 | |
---|
| 392 | int |
---|
| 393 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
| 394 | gpfs_d_revalidate(struct dentry *dentry, struct nameidata *ni) |
---|
| 395 | #else |
---|
| 396 | gpfs_d_revalidate(struct dentry *dentry, int flags) |
---|
| 397 | #endif |
---|
| 398 | { |
---|
| 399 | int rc; |
---|
| 400 | cxiNode_t *dcnP; |
---|
| 401 | cxiNode_t *cnP = NULL; |
---|
| 402 | struct inode *diP; |
---|
| 403 | struct inode *newInodeP; |
---|
| 404 | struct gpfsVfsData_t *privVfsP; |
---|
| 405 | cxiIno_t iNum = (cxiIno_t)-1; |
---|
| 406 | ext_cred_t eCred; |
---|
| 407 | struct dentry *retP; |
---|
| 408 | |
---|
| 409 | ENTER(0); |
---|
| 410 | TRACE6(TRACE_VNODE, 4, TRCID_DIR_REVALIDATE, |
---|
| 411 | "gpfs_d_revalidate enter: dentry 0x%lX " |
---|
| 412 | "d_inode 0x%lX inum %d parent 0x%lX cwd 0x%lX d_name '%s'", |
---|
| 413 | dentry, dentry->d_inode, |
---|
| 414 | dentry->d_inode ? dentry->d_inode->i_ino : -1, |
---|
| 415 | dentry->d_parent, current->fs->pwd, dentry->d_name.name); |
---|
| 416 | |
---|
| 417 | rc = gpfs_i_revalidate(dentry); |
---|
| 418 | |
---|
| 419 | /* We're going to need to revalidate this according to its name. |
---|
| 420 | * The scenario that caused us problems is: |
---|
| 421 | * |
---|
| 422 | * Node a) mkdir dir1; touch dir1/file1 |
---|
| 423 | * Node b) mv dir1 dir2 |
---|
| 424 | * Node a) ls -al dir1 |
---|
| 425 | * |
---|
| 426 | * This code used to just revalidate the inode (gpfs_i_revalidate) |
---|
| 427 | * which would succeed since the dir1 inode is indeed still valid. |
---|
| 428 | * However its name has now changed to dir2 and thus this lookup |
---|
| 429 | * with its last known name is performed. We don't perform this |
---|
| 430 | * lookup for the root inode. We didn't have to do this before |
---|
| 431 | * RH 2.4.18-5 (unusual fix for NFS is in that kernel) but now |
---|
| 432 | * we have to go thru these machinations. Most of this is a |
---|
| 433 | * tradeoff and doesn't give exactly correct semantics. |
---|
| 434 | * |
---|
| 435 | * For instance normally on a local node directory rename the dentry |
---|
| 436 | * gets moved over to its new position via d_move. However in our |
---|
| 437 | * case we don't know what the new name is since we've just lost |
---|
| 438 | * the token and have no other info. If a process is sitting in this |
---|
| 439 | * renamed directory structure then it has to remain valid for that |
---|
| 440 | * process but none other. We unhash the directory so no other |
---|
| 441 | * process can step into that subtree but continue to say its valid |
---|
| 442 | * if (d_count > 1). At that point the only process calling d_revalidate |
---|
| 443 | * would be a process with it's current working directory in that |
---|
| 444 | * subtree. However this breaks down if the process needs to back |
---|
| 445 | * up into a parent directory, since d_revalidate starts from outside |
---|
| 446 | * the renamed subtree and can't proceed into the unhashed directory. |
---|
| 447 | * Thus you get an odd |
---|
| 448 | * getcwd: cannot access parent directories: No such file or directory |
---|
| 449 | * ancillary message but you can cd backwards correctly. |
---|
| 450 | * |
---|
| 451 | * Another idea attempted was looking at the process' cwd in the |
---|
| 452 | * task struct and answering whether the dentry was valid on a |
---|
| 453 | * per process basis. This gave odd semantics because a process |
---|
| 454 | * could list the parent directory and not see the renamed child |
---|
| 455 | * but could still cd into it (because it was still hashed). That |
---|
| 456 | * breaks down completely if another node makes a directory of the |
---|
| 457 | * old name in the parent. |
---|
| 458 | * |
---|
| 459 | * So if we can't use d_move...which it doesn't appear possible to |
---|
| 460 | * do, at a minimum you have to unhash the directory if it no |
---|
| 461 | * longer has the correct name or inode. |
---|
| 462 | * |
---|
| 463 | * Note that once a process steps out of the renamed dentry then |
---|
| 464 | * the final dput will kill the dentry. |
---|
| 465 | */ |
---|
| 466 | if (rc == 0 && dentry->d_inode->i_ino != INODENUM_ROOTDIR_FILE) |
---|
| 467 | { |
---|
| 468 | setCred(&eCred); |
---|
| 469 | |
---|
| 470 | privVfsP = VP_TO_PVP(dentry->d_inode); |
---|
| 471 | DBGASSERT(privVfsP != NULL); |
---|
| 472 | LOGASSERT(dentry->d_parent != NULL); |
---|
| 473 | |
---|
| 474 | diP = dentry->d_parent->d_inode; |
---|
| 475 | dcnP = VP_TO_CNP(diP); |
---|
| 476 | |
---|
| 477 | rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP, |
---|
| 478 | NULL, (char *)dentry->d_name.name, |
---|
| 479 | (void **)&newInodeP, &cnP, &iNum, NULL, |
---|
| 480 | NULL, &eCred, (void **)&retP); |
---|
| 481 | if (rc == 0) |
---|
| 482 | { |
---|
| 483 | iput(newInodeP); |
---|
| 484 | |
---|
| 485 | if (iNum != dentry->d_inode->i_ino) |
---|
| 486 | rc = ESTALE; |
---|
| 487 | } |
---|
| 488 | |
---|
| 489 | /* The name is either no longer valid or has been renamed |
---|
| 490 | * and recreated with a different inode. We need to drop |
---|
| 491 | * the dentry from the hash list so another process can't |
---|
| 492 | * proceed into that tree. |
---|
| 493 | */ |
---|
| 494 | if (rc) |
---|
| 495 | { |
---|
| 496 | DENTRY_D_DROP(dentry); |
---|
| 497 | |
---|
| 498 | d_prune_aliases(dentry->d_inode); |
---|
| 499 | |
---|
| 500 | /* If the dentry still has processes sitting underneath |
---|
| 501 | * it we'll still claim its valid. |
---|
| 502 | */ |
---|
| 503 | if (atomic_read(&dentry->d_count) > 1) |
---|
| 504 | rc = 0; |
---|
| 505 | } |
---|
| 506 | } |
---|
| 507 | |
---|
| 508 | xerror: |
---|
| 509 | TRACE2(TRACE_VNODE, 4, TRCID_DIR_REVALIDATE_EX, |
---|
| 510 | "gpfs_d_revalidate exit: dentry 0x%lX rc %d\n", |
---|
| 511 | dentry, rc); |
---|
| 512 | EXIT(0); |
---|
| 513 | if (rc) |
---|
| 514 | return false; |
---|
| 515 | else |
---|
| 516 | return true; |
---|
| 517 | } |
---|
| 518 | |
---|
| 519 | #ifdef CCL |
---|
| 520 | /* The d_revalidate function checks whether the directory entry |
---|
| 521 | cached in the given dentry struct is still valid. |
---|
| 522 | Any dentry referencing this operation is |
---|
| 523 | a positive dentry that was created for |
---|
| 524 | an inexact caseless file name match for a Samba client. |
---|
| 525 | The d_revalidate returns "true" for subsequent Samba clients |
---|
| 526 | indicating that the positive dcache entry is still valid. |
---|
| 527 | It returns "false" for local or NFS clients indicating |
---|
| 528 | that the dcache entry is no longer valid which forces |
---|
| 529 | a new lookup. */ |
---|
| 530 | int |
---|
| 531 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
| 532 | gpfs_d_valid_if_Samba(struct dentry *dentry, struct nameidata *ni) |
---|
| 533 | #else |
---|
| 534 | gpfs_d_valid_if_Samba(struct dentry *dentry, int flags) |
---|
| 535 | #endif |
---|
| 536 | { |
---|
| 537 | TRACE4(TRACE_VNODE, 4, TRCID_DIR_VALID_IF_SAMBA, |
---|
| 538 | "gpfs_d_valid_if_Samba: dentry 0x%lX " |
---|
| 539 | "d_inode 0x%lX (name '%s') returns %s\n", |
---|
| 540 | dentry, dentry->d_inode, dentry->d_name.name, |
---|
| 541 | (cxiIsSambaThread() ? "true" : "false")); |
---|
| 542 | return cxiIsSambaThread(); |
---|
| 543 | } |
---|
| 544 | |
---|
| 545 | /* The d_revalidate function checks whether the directory entry |
---|
| 546 | cached in the given dentry struct is still valid. |
---|
| 547 | Any dentry referencing this operation is |
---|
| 548 | a negative dentry that was created for |
---|
| 549 | an exact file name match which failed for a local or NFS client. |
---|
| 550 | The d_revalidate returns "true" for subsequent local or NFS clients |
---|
| 551 | indicating that the negative dcache entry is still valid. |
---|
| 552 | It returns "false" for Samba clients indicating |
---|
| 553 | that the dcache entry is no longer valid which forces |
---|
| 554 | a new lookup. */ |
---|
| 555 | int |
---|
| 556 | #if LINUX_KERNEL_VERSION >= 2060000 |
---|
| 557 | gpfs_d_invalid_if_Samba(struct dentry *dentry, struct nameidata *ni) |
---|
| 558 | #else |
---|
| 559 | gpfs_d_invalid_if_Samba(struct dentry *dentry, int flags) |
---|
| 560 | #endif |
---|
| 561 | { |
---|
| 562 | TRACE4(TRACE_VNODE, 4, TRCID_DIR_INVALID_IF_SAMBA, |
---|
| 563 | "gpfs_d_invalid_if_Samba: dentry 0x%lX " |
---|
| 564 | "d_inode 0x%lX (name '%s') returns %s\n", |
---|
| 565 | dentry, dentry->d_inode, dentry->d_name.name, |
---|
| 566 | (cxiIsSambaThread() ? "false" : "true")); |
---|
| 567 | return !cxiIsSambaThread(); |
---|
| 568 | } |
---|
| 569 | #endif |
---|