| 1 | /*************************************************************************** | 
|---|
| 2 | * | 
|---|
| 3 | * Copyright (C) 2001 International Business Machines | 
|---|
| 4 | * All rights reserved. | 
|---|
| 5 | * | 
|---|
| 6 | * This file is part of the GPFS mmfslinux kernel module. | 
|---|
| 7 | * | 
|---|
| 8 | * Redistribution and use in source and binary forms, with or without | 
|---|
| 9 | * modification, are permitted provided that the following conditions | 
|---|
| 10 | * are met: | 
|---|
| 11 | * | 
|---|
| 12 | *  1. Redistributions of source code must retain the above copyright notice, | 
|---|
| 13 | *     this list of conditions and the following disclaimer. | 
|---|
| 14 | *  2. Redistributions in binary form must reproduce the above copyright | 
|---|
| 15 | *     notice, this list of conditions and the following disclaimer in the | 
|---|
| 16 | *     documentation and/or other materials provided with the distribution. | 
|---|
| 17 | *  3. The name of the author may not be used to endorse or promote products | 
|---|
| 18 | *     derived from this software without specific prior written | 
|---|
| 19 | *     permission. | 
|---|
| 20 | * | 
|---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | 
|---|
| 22 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | 
|---|
| 23 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 
|---|
| 24 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
|---|
| 25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
|---|
| 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | 
|---|
| 27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | 
|---|
| 28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | 
|---|
| 29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | 
|---|
| 30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|---|
| 31 | * | 
|---|
| 32 | *************************************************************************** */ | 
|---|
| 33 | /* @(#)94       1.48  src/avs/fs/mmfs/ts/kernext/gpl-linux/dir.c, mmfs, avs_rgpfs24, rgpfs240610b 11/8/05 10:20:56 */ | 
|---|
| 34 |  | 
|---|
| 35 | #define __NO_VERSION__ | 
|---|
| 36 |  | 
|---|
| 37 | #ifndef __KERNEL__ | 
|---|
| 38 | #define __KERNEL__ | 
|---|
| 39 | #endif | 
|---|
| 40 |  | 
|---|
| 41 | #include <Shark-gpl.h> | 
|---|
| 42 |  | 
|---|
| 43 | #include <linux/fs.h> | 
|---|
| 44 | #include <linux/sched.h> | 
|---|
| 45 |  | 
|---|
| 46 | #include <linux2gpfs.h> | 
|---|
| 47 | #include <cxiSystem.h> | 
|---|
| 48 | #include <cxiTypes.h> | 
|---|
| 49 | #include <cxiIOBuffer.h> | 
|---|
| 50 | #include <cxiSharedSeg.h> | 
|---|
| 51 | #include <cxiCred.h> | 
|---|
| 52 | #include <cxi2gpfs.h> | 
|---|
| 53 | #include <Trace.h> | 
|---|
| 54 | #include <verdep.h> | 
|---|
| 55 |  | 
|---|
| 56 |  | 
|---|
| 57 | /* About dcache revalidation: | 
|---|
| 58 |  | 
|---|
| 59 | The Linux directory cache (dcache) is used to cache the result of name | 
|---|
| 60 | lookups.  Linux caches positive as well as negative lookup results in its | 
|---|
| 61 | dcache entries (struct dentry): if the file existed at the time the last | 
|---|
| 62 | lookup was done (positive lookup), dentry->d_inode will point to the struct | 
|---|
| 63 | inode of the file; if the file did not exist (negative lookup), | 
|---|
| 64 | dentry->d_inode will be null. | 
|---|
| 65 |  | 
|---|
| 66 | When a directory is modified on the local node, Linux will update its | 
|---|
| 67 | dcache entries accordingly.  When the directory is modified on another | 
|---|
| 68 | node, however, we need to invalidate local dcache entries: | 
|---|
| 69 |  | 
|---|
| 70 | - A negative dcache entry becomes invalid when a file by the same name is | 
|---|
| 71 | created on another node.  This requires an exclusive byte-range token on | 
|---|
| 72 | the directory block in which the lookup was done that resulted in the | 
|---|
| 73 | dcache entry.  Hence, when we lose a byte-range token on a directory, we | 
|---|
| 74 | invalidate all negative dcache entries for lookups that were done in | 
|---|
| 75 | that directory.  This is done by a call to kxinvalidateOSNode with | 
|---|
| 76 | KXIVO_NEGDCACHE, which will result in a call to | 
|---|
| 77 | cxiInvalidateNegDCacheEntry() implemented here. | 
|---|
| 78 |  | 
|---|
| 79 | - A positive dcache entry becomes invalid when the file it refers to | 
|---|
| 80 | is deleted, moved, or renamed on another node.  All of these operations | 
|---|
| 81 | require an exclusive inode lock.  Hence we invalidate a positive dcache | 
|---|
| 82 | entry when we lose the inode token for the file.  This more selective | 
|---|
| 83 | invalidation of positive dcache entries is more efficient than simply | 
|---|
| 84 | invalidating all dcache entries when we lose a byte-range token on the | 
|---|
| 85 | directory.  The invalidation is done by a call to kxinvalidateOSNode | 
|---|
| 86 | with CXI_IC_DCACHE, which will result in a call to | 
|---|
| 87 | cxiInvalidateDCacheEntry() implemented here. | 
|---|
| 88 |  | 
|---|
| 89 | To invalidate a dcache entry Linux defines a d_revalidate function in the | 
|---|
| 90 | dentry_operations table.  This function is supposed to check whether the | 
|---|
| 91 | dcache entry is still valid and return 'true' or 'false' accordingly. | 
|---|
| 92 | If no d_revalidate function is given in the dentry_operations table, | 
|---|
| 93 | Linux assumes the dentry is valid.  Hence the most efficient way | 
|---|
| 94 | of marking a dentry as valid or invalid is to have the d_ops field in | 
|---|
| 95 | the dentry point to one of two different dentry_operations tables: | 
|---|
| 96 | one where the d_revalidate field is NULL (means the dentry is valid), | 
|---|
| 97 | and one where d_revalidate points at a function that always returns false | 
|---|
| 98 | (means the dentry is invalid). */ | 
|---|
| 99 |  | 
|---|
| 100 |  | 
|---|
| 101 | /* This call handles pruning off all unheld dentries pointing at an | 
|---|
| 102 | * inode. Normally pruning is not done by any daemon thread directly | 
|---|
| 103 | * (ie. token revoke) because d_prune_aliases may initiate a string of | 
|---|
| 104 | * callbacks due to iput.  These callbacks may need to communicate back | 
|---|
| 105 | * to the daemon which can be problematic if there is a mailbox shortage. | 
|---|
| 106 | * Hence most dentry invalidation marks the cxiNode as needing a dentry | 
|---|
| 107 | * prune and the GPFS swapd is notified to call cxiPruneDCacheEntry in a | 
|---|
| 108 | * separate thread. | 
|---|
| 109 | * | 
|---|
| 110 | * Caller must be prepared to receive iput() callback into GPFS. | 
|---|
| 111 | * Caller must have a reference on cxiNode_t to ensure it doesn't | 
|---|
| 112 | * go away during processing. | 
|---|
| 113 | */ | 
|---|
| 114 | int | 
|---|
| 115 | cxiPruneDCacheEntry(cxiNode_t *cnP) | 
|---|
| 116 | { | 
|---|
| 117 | struct inode *iP = (struct inode *)cnP->osNodeP; | 
|---|
| 118 | struct list_head *dListP, *dHeadP; | 
|---|
| 119 | struct dentry *dentry; | 
|---|
| 120 | Boolean hasSubdirs = false; | 
|---|
| 121 | int refCount = 0; | 
|---|
| 122 |  | 
|---|
| 123 | ENTER(0); | 
|---|
| 124 | TRACE2(TRACE_VNODE, 4, TRCID_PRUNE_DCACHE, | 
|---|
| 125 | "cxiPruneDCacheEntry: iP 0x%lX inode %d", iP, iP->i_ino); | 
|---|
| 126 |  | 
|---|
| 127 | /* About to prune it so flag is no longer needed */ | 
|---|
| 128 | ClearCtFlag(cnP, pruneDCacheNeeded); | 
|---|
| 129 |  | 
|---|
| 130 | /* This call prunes any unheld dentries pointing at the inode */ | 
|---|
| 131 | d_prune_aliases(iP); | 
|---|
| 132 |  | 
|---|
| 133 | /* Traverse the list of all dentries that still refer to this file. */ | 
|---|
| 134 | dHeadP = &iP->i_dentry; | 
|---|
| 135 | spin_lock(&dcache_lock); | 
|---|
| 136 | for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next) | 
|---|
| 137 | { | 
|---|
| 138 | /* count dentries that still refer to this file */ | 
|---|
| 139 | refCount++; | 
|---|
| 140 |  | 
|---|
| 141 | dentry = list_entry(dListP, struct dentry, d_alias); | 
|---|
| 142 | hasSubdirs = !list_empty(&dentry->d_subdirs); | 
|---|
| 143 |  | 
|---|
| 144 | TRACE5N(TRACE_VNODE, 4, TRCID_PRUNE_DCACHE_ALIAS, | 
|---|
| 145 | "cxiPruneDCacheEntry: ip 0x%lX ino %d alias dentry 0x%lX " | 
|---|
| 146 | "hasSubdirs %d name '%s'", iP, iP->i_ino, dentry, | 
|---|
| 147 | hasSubdirs, dentry->d_name.name); | 
|---|
| 148 |  | 
|---|
| 149 | /* Attempt to prune unused children.  Helps keep stat cache manageable */ | 
|---|
| 150 | if (hasSubdirs) | 
|---|
| 151 | { | 
|---|
| 152 | dget_locked(dentry); | 
|---|
| 153 | spin_unlock(&dcache_lock); | 
|---|
| 154 |  | 
|---|
| 155 | /* This call walks the tree starting at this parent dentry and | 
|---|
| 156 | * will successfully uncache child dentries that aren't held by | 
|---|
| 157 | * user programs and iput their associated inodes (resulting in | 
|---|
| 158 | * many cases of the inode i_count going to 0.  iput() may however | 
|---|
| 159 | * just put these inodes on the unused list if they are still | 
|---|
| 160 | * valid (i_nlink > 0) and linked on i_hash.  Thus in many cases while | 
|---|
| 161 | * the dentries immediately disappear their associated inode don't | 
|---|
| 162 | * have an immediate clear_inode() called on them.  Subsequent | 
|---|
| 163 | * pruning (by kswapd) should shrink the icache for unused inodes | 
|---|
| 164 | * resulting in the gpfs_s_clear_inode callback for these inodes. | 
|---|
| 165 | */ | 
|---|
| 166 | shrink_dcache_parent(dentry); | 
|---|
| 167 | dput(dentry); | 
|---|
| 168 |  | 
|---|
| 169 | /* For directories we don't support hard links so we shouldn't | 
|---|
| 170 | * have multiple dentries that need to be pruned.  Hence | 
|---|
| 171 | * after having dropped the dcache lock we break out of this | 
|---|
| 172 | * for loop. | 
|---|
| 173 | */ | 
|---|
| 174 | break; | 
|---|
| 175 | } | 
|---|
| 176 | } | 
|---|
| 177 | if (!hasSubdirs) | 
|---|
| 178 | spin_unlock(&dcache_lock); | 
|---|
| 179 |  | 
|---|
| 180 | EXIT(0); | 
|---|
| 181 | return refCount; | 
|---|
| 182 | } | 
|---|
| 183 |  | 
|---|
| 184 | /* Mark the dentry as needing a revalidate.  Called after losing | 
|---|
| 185 | * a token protecting the attributes of this dcache entry. | 
|---|
| 186 | */ | 
|---|
| 187 | int | 
|---|
| 188 | cxiInvalidateDCacheEntry(cxiNode_t *cnP) | 
|---|
| 189 | { | 
|---|
| 190 | struct inode *iP = (struct inode *)cnP->osNodeP; | 
|---|
| 191 | struct list_head *dListP, *dHeadP; | 
|---|
| 192 | struct dentry *dentry; | 
|---|
| 193 | int refCount = 0; | 
|---|
| 194 |  | 
|---|
| 195 | /* Traverse the list of all dentries that refer to this file. */ | 
|---|
| 196 | ENTER(0); | 
|---|
| 197 | TRACE2(TRACE_VNODE, 4, TRCID_INVAL_DCACHE, | 
|---|
| 198 | "cxiInvalidateDCacheEntry: ip 0x%lX inode %d", iP, iP->i_ino); | 
|---|
| 199 |  | 
|---|
| 200 | dHeadP = &iP->i_dentry; | 
|---|
| 201 | spin_lock(&dcache_lock); | 
|---|
| 202 | for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next) | 
|---|
| 203 | { | 
|---|
| 204 | refCount++; | 
|---|
| 205 |  | 
|---|
| 206 | /* Mark the entry as needing revalidation by setting the d_op | 
|---|
| 207 | * function table to gpfs_dops_revalidate.  Since this dentry | 
|---|
| 208 | * is staying in the vfs we can't declare it invalid, or a legitimate | 
|---|
| 209 | * stat on it may return ESTALE. | 
|---|
| 210 | * Scenario: node a) mkdir foo; cd foo | 
|---|
| 211 | *           node b) chmod 500 foo | 
|---|
| 212 | *           node a) ls -al | 
|---|
| 213 | * must succeed. | 
|---|
| 214 | */ | 
|---|
| 215 | dentry = list_entry(dListP, struct dentry, d_alias); | 
|---|
| 216 | dentry->d_op = &gpfs_dops_revalidate; | 
|---|
| 217 |  | 
|---|
| 218 | TRACE4N(TRACE_VNODE, 4, TRCID_INVAL_DCACHE_ALIAS, | 
|---|
| 219 | "cxiInvalidateDCacheEntry: ip 0x%lX ino %d " | 
|---|
| 220 | "alias dentry 0x%lX name '%s'", | 
|---|
| 221 | iP, iP->i_ino, dentry, dentry->d_name.name); | 
|---|
| 222 |  | 
|---|
| 223 | if (TestCtFlag(cnP, destroyIfDelInode)) | 
|---|
| 224 | { | 
|---|
| 225 | /* If the file was deleted, marking the dentry invalid is not | 
|---|
| 226 | * sufficient.  If we leave the dentry in the cache marked as | 
|---|
| 227 | * invalid, it will remain in the cache until: | 
|---|
| 228 | * | 
|---|
| 229 | *  a) if it has a zero d_count then the scheduled GPFS swapd | 
|---|
| 230 | *     d_prune_aliases will get rid of it | 
|---|
| 231 | *  b) if it has a nonzero d_count (it's open) then d_prune_aliases | 
|---|
| 232 | *     would not prune it and it would stay in the cache until the | 
|---|
| 233 | *     next lookup finds it and calls d_invalidate, which might not | 
|---|
| 234 | *     ever happen. | 
|---|
| 235 | * | 
|---|
| 236 | * Thus we drop the dentry and the final close or schedule | 
|---|
| 237 | * d_prune_aliases will remove it. | 
|---|
| 238 | */ | 
|---|
| 239 | DENTRY_DROP(dentry); | 
|---|
| 240 | } | 
|---|
| 241 |  | 
|---|
| 242 | /* Dentries for this cxiNode_t should be pruned by GPFS swapd thread | 
|---|
| 243 | * which will be signalled by the caller of this routine. | 
|---|
| 244 | */ | 
|---|
| 245 | SetCtFlag(cnP, pruneDCacheNeeded); | 
|---|
| 246 | } | 
|---|
| 247 | spin_unlock(&dcache_lock); | 
|---|
| 248 |  | 
|---|
| 249 | EXIT(0); | 
|---|
| 250 | return refCount; | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | /* The following function is called to remove invalid dcache entries for a | 
|---|
| 254 | file when the file is deleted on this node. | 
|---|
| 255 | Such invalid dcache entries occur when a file is renamed on another node | 
|---|
| 256 | before it is deleted here.  The rename revokes the inode token, which marks | 
|---|
| 257 | the dcache entry invalid, but does not remove it from the cache on this | 
|---|
| 258 | node.  When the file is deleted, the delete operation on this node will | 
|---|
| 259 | look up the file under its new name and turn the (new) dcache entry into a | 
|---|
| 260 | negative dcache entry, but since the file was renamed, it will not find or | 
|---|
| 261 | process the old, invalid dcache entry (the one referring to the old file | 
|---|
| 262 | name).  This function is called during delete (when the link count goes to | 
|---|
| 263 | zero) to remove old, invalid dcache entries, so the file can be destroyed. | 
|---|
| 264 | The function is similar to cxiInvalidateDCacheEntry, with the following | 
|---|
| 265 | differences: (1) it is only called on files that are being deleted (link | 
|---|
| 266 | count zero and destroyIfDelInode flag already set), (2) it does not mark | 
|---|
| 267 | any dcache entries as invalid; instead, it (3) only drops dcache entries | 
|---|
| 268 | that are already marked as invalid.  In particular, we do not want to | 
|---|
| 269 | invalidate the dcache entry referring to the current name being unlinked, | 
|---|
| 270 | because unlink will turn this into a valid, negative dcache entry. */ | 
|---|
| 271 | void | 
|---|
| 272 | cxiDropInvalidDCacheEntry(cxiNode_t *cnP) | 
|---|
| 273 | { | 
|---|
| 274 | struct inode *iP = (struct inode *)cnP->osNodeP; | 
|---|
| 275 | struct list_head *dListP, *dHeadP; | 
|---|
| 276 | struct dentry *dentry; | 
|---|
| 277 | int holdCount; | 
|---|
| 278 |  | 
|---|
| 279 | ENTER(0); | 
|---|
| 280 | TRACE2(TRACE_VNODE, 4, TRCID_DROP_INVAL_DCACHE, | 
|---|
| 281 | "cxiDropInvalidDCacheEntry: iP 0x%lX i_ino %d", | 
|---|
| 282 | iP, iP->i_ino); | 
|---|
| 283 |  | 
|---|
| 284 | DBGASSERT(TestCtFlag(cnP, destroyIfDelInode)); | 
|---|
| 285 |  | 
|---|
| 286 | /* Traverse the list of all dentries that still refer to this file. */ | 
|---|
| 287 | dHeadP = &iP->i_dentry; | 
|---|
| 288 | spin_lock(&dcache_lock); | 
|---|
| 289 | for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next) | 
|---|
| 290 | { | 
|---|
| 291 | /* Check whether this dentry mas been marked invalid */ | 
|---|
| 292 | dentry = list_entry(dListP, struct dentry, d_alias); | 
|---|
| 293 | if (dentry->d_op == &gpfs_dops_invalid || | 
|---|
| 294 | dentry->d_op == &gpfs_dops_revalidate) | 
|---|
| 295 | { | 
|---|
| 296 | TRACE4N(TRACE_VNODE, 4, TRCID_DROP_INVAL_DCACHE_ALIAS, | 
|---|
| 297 | "cxiDropInvalidDCacheEntry: ip 0x%lX ino %d " | 
|---|
| 298 | "removing dentry 0x%lX name '%s'\n", | 
|---|
| 299 | iP, iP->i_ino, dentry, dentry->d_name.name); | 
|---|
| 300 |  | 
|---|
| 301 | /* Drop the dcache entry.  See details in cxiInvalidateDCacheEntry */ | 
|---|
| 302 | DENTRY_DROP(dentry); | 
|---|
| 303 |  | 
|---|
| 304 | /* Dentries for this cxiNode_t should be pruned */ | 
|---|
| 305 | SetCtFlag(cnP, pruneDCacheNeeded); | 
|---|
| 306 | } | 
|---|
| 307 | } | 
|---|
| 308 | spin_unlock(&dcache_lock); | 
|---|
| 309 | EXIT(0); | 
|---|
| 310 | } | 
|---|
| 311 |  | 
|---|
| 312 | /* The following function is called to invalidate negative dcache entries for | 
|---|
| 313 | all files in a directory when we lose the BR token for the directory. */ | 
|---|
| 314 | int | 
|---|
| 315 | cxiInvalidateNegDCacheEntry(cxiNode_t *cnP) | 
|---|
| 316 | { | 
|---|
| 317 | struct inode *iP = (struct inode *)cnP->osNodeP; | 
|---|
| 318 | struct list_head *dListP, *dHeadP; | 
|---|
| 319 | struct list_head *cListP, *cHeadP; | 
|---|
| 320 | struct dentry *dentry, *child; | 
|---|
| 321 | int refCount = 0; | 
|---|
| 322 |  | 
|---|
| 323 | ENTER(0); | 
|---|
| 324 | TRACE2(TRACE_VNODE, 4, TRCID_INVAL_NEG_DCACHE, | 
|---|
| 325 | "cxiInvalidateNegDCacheEntry: iP 0x%lX inode %d", | 
|---|
| 326 | iP, iP->i_ino); | 
|---|
| 327 |  | 
|---|
| 328 | /* Traverse the list of all dentries that refer to this directory. | 
|---|
| 329 | Note: since we don't support hard links to directories, we expect | 
|---|
| 330 | there to be exactly one dentry on this list. */ | 
|---|
| 331 | dHeadP = &iP->i_dentry; | 
|---|
| 332 | spin_lock(&dcache_lock); | 
|---|
| 333 | for (dListP = dHeadP->next; dListP != dHeadP; dListP = dListP->next) | 
|---|
| 334 | { | 
|---|
| 335 | refCount++; | 
|---|
| 336 |  | 
|---|
| 337 | /* traverse the list of all children of this dentry */ | 
|---|
| 338 | dentry = list_entry(dListP, struct dentry, d_alias); | 
|---|
| 339 | cHeadP = &dentry->d_subdirs; | 
|---|
| 340 | for (cListP = cHeadP->next; cListP != cHeadP; cListP = cListP->next) | 
|---|
| 341 | { | 
|---|
| 342 | /* If this child is a negative dentry (d_inode pointer is NULL), | 
|---|
| 343 | mark the entry invalid by setting the dop function table to | 
|---|
| 344 | gpfs_dops_invalid, which contains a d_revalidate function that | 
|---|
| 345 | always returns false.  Also handle dcache entries that are | 
|---|
| 346 | about to be deleted (unlink operation pending but not yet complete). | 
|---|
| 347 | These entries still have a non-null d_inode pointer, but are | 
|---|
| 348 | marked as "delete pending" by having a different d_op table. | 
|---|
| 349 | We should not mark the latter as invalid, because we don't know | 
|---|
| 350 | yet whether the delete operation is going to succeed, so we mark | 
|---|
| 351 | those dentries as "needing revalidation".  (see also comments | 
|---|
| 352 | in gpfs_i_unlink and gpfs_i_rmdir). */ | 
|---|
| 353 | child = list_entry(cListP, struct dentry, d_child); | 
|---|
| 354 | if (!child->d_inode || child->d_op == &gpfs_dops_ddeletepending) | 
|---|
| 355 | { | 
|---|
| 356 | child->d_op = !child->d_inode ? | 
|---|
| 357 | &gpfs_dops_invalid : &gpfs_dops_revalidate; | 
|---|
| 358 |  | 
|---|
| 359 | TRACE5N(TRACE_VNODE, 4, TRCID_INVAL_NEG_DUNCACHE, | 
|---|
| 360 | "cxiInvalidateNegDCacheEntry: ip 0x%lX ino %d " | 
|---|
| 361 | "%s dentry 0x%lX name '%s'", | 
|---|
| 362 | iP, iP->i_ino, | 
|---|
| 363 | !child->d_inode ? "inval" : "reval", | 
|---|
| 364 | child, child->d_name.name); | 
|---|
| 365 | } | 
|---|
| 366 | } | 
|---|
| 367 | } | 
|---|
| 368 | spin_unlock(&dcache_lock); | 
|---|
| 369 |  | 
|---|
| 370 | EXIT(0); | 
|---|
| 371 | return refCount; | 
|---|
| 372 | } | 
|---|
| 373 |  | 
|---|
| 374 | /* dentry_operations */ | 
|---|
| 375 |  | 
|---|
| 376 | /* The d_revalidate function is expected to check whether the directory entry | 
|---|
| 377 | * cached in the given dentry struct is still valid. | 
|---|
| 378 | */ | 
|---|
| 379 | int | 
|---|
| 380 | #if LINUX_KERNEL_VERSION >= 2060000 | 
|---|
| 381 | gpfs_d_invalid(struct dentry *dentry, struct nameidata *ni) | 
|---|
| 382 | #else | 
|---|
| 383 | gpfs_d_invalid(struct dentry *dentry, int flags) | 
|---|
| 384 | #endif | 
|---|
| 385 | { | 
|---|
| 386 | TRACE3(TRACE_VNODE, 4, TRCID_DIR_001, | 
|---|
| 387 | "gpfs_d_invalid: dentry 0x%lX d_inode 0x%lX name '%s' is invalid", | 
|---|
| 388 | dentry, dentry->d_inode, dentry->d_name.name); | 
|---|
| 389 | return false; | 
|---|
| 390 | } | 
|---|
| 391 |  | 
|---|
| 392 | int | 
|---|
| 393 | #if LINUX_KERNEL_VERSION >= 2060000 | 
|---|
| 394 | gpfs_d_revalidate(struct dentry *dentry, struct nameidata *ni) | 
|---|
| 395 | #else | 
|---|
| 396 | gpfs_d_revalidate(struct dentry *dentry, int flags) | 
|---|
| 397 | #endif | 
|---|
| 398 | { | 
|---|
| 399 | int rc; | 
|---|
| 400 | cxiNode_t *dcnP; | 
|---|
| 401 | cxiNode_t *cnP = NULL; | 
|---|
| 402 | struct inode *diP; | 
|---|
| 403 | struct inode *newInodeP; | 
|---|
| 404 | struct gpfsVfsData_t *privVfsP; | 
|---|
| 405 | cxiIno_t iNum = (cxiIno_t)-1; | 
|---|
| 406 | ext_cred_t eCred; | 
|---|
| 407 | struct dentry *retP; | 
|---|
| 408 |  | 
|---|
| 409 | ENTER(0); | 
|---|
| 410 | TRACE6(TRACE_VNODE, 4, TRCID_DIR_REVALIDATE, | 
|---|
| 411 | "gpfs_d_revalidate enter: dentry 0x%lX " | 
|---|
| 412 | "d_inode 0x%lX inum %d parent 0x%lX cwd 0x%lX d_name '%s'", | 
|---|
| 413 | dentry, dentry->d_inode, | 
|---|
| 414 | dentry->d_inode ? dentry->d_inode->i_ino : -1, | 
|---|
| 415 | dentry->d_parent, current->fs->pwd, dentry->d_name.name); | 
|---|
| 416 |  | 
|---|
| 417 | rc = gpfs_i_revalidate(dentry); | 
|---|
| 418 |  | 
|---|
| 419 | /* We're going to need to revalidate this according to its name. | 
|---|
| 420 | * The scenario that caused us problems is: | 
|---|
| 421 | * | 
|---|
| 422 | *   Node a) mkdir dir1; touch dir1/file1 | 
|---|
| 423 | *   Node b) mv dir1 dir2 | 
|---|
| 424 | *   Node a) ls -al dir1 | 
|---|
| 425 | * | 
|---|
| 426 | * This code used to just revalidate the inode (gpfs_i_revalidate) | 
|---|
| 427 | * which would succeed since the dir1 inode is indeed still valid. | 
|---|
| 428 | * However its name has now changed to dir2 and thus this lookup | 
|---|
| 429 | * with its last known name is performed.  We don't perform this | 
|---|
| 430 | * lookup for the root inode.  We didn't have to do this before | 
|---|
| 431 | * RH 2.4.18-5 (unusual fix for NFS is in that kernel) but now | 
|---|
| 432 | * we have to go thru these machinations.  Most of this is a | 
|---|
| 433 | * tradeoff and doesn't give exactly correct semantics. | 
|---|
| 434 | * | 
|---|
| 435 | * For instance normally on a local node directory rename the dentry | 
|---|
| 436 | * gets moved over to its new position via d_move.  However in our | 
|---|
| 437 | * case we don't know what the new name is since we've just lost | 
|---|
| 438 | * the token and have no other info.  If a process is sitting in this | 
|---|
| 439 | * renamed directory structure then it has to remain valid for that | 
|---|
| 440 | * process but none other.  We unhash the directory so no other | 
|---|
| 441 | * process can step into that subtree but continue to say its valid | 
|---|
| 442 | * if (d_count > 1).  At that point the only process calling d_revalidate | 
|---|
| 443 | * would be a process with it's current working directory in that | 
|---|
| 444 | * subtree.  However this breaks down if the process needs to back | 
|---|
| 445 | * up into a parent directory, since d_revalidate starts from outside | 
|---|
| 446 | * the renamed subtree and can't proceed into the unhashed directory. | 
|---|
| 447 | * Thus you get an odd | 
|---|
| 448 | *   getcwd: cannot access parent directories: No such file or directory | 
|---|
| 449 | * ancillary message but you can cd backwards correctly. | 
|---|
| 450 | * | 
|---|
| 451 | * Another idea attempted was looking at the process' cwd in the | 
|---|
| 452 | * task struct and answering whether the dentry was valid on a | 
|---|
| 453 | * per process basis.  This gave odd semantics because a process | 
|---|
| 454 | * could list the parent directory and not see the renamed child | 
|---|
| 455 | * but could still cd into it (because it was still hashed).  That | 
|---|
| 456 | * breaks down completely if another node makes a directory of the | 
|---|
| 457 | * old name in the parent. | 
|---|
| 458 | * | 
|---|
| 459 | * So if we can't use d_move...which it doesn't appear possible to | 
|---|
| 460 | * do, at a minimum you have to unhash the directory if it no | 
|---|
| 461 | * longer has the correct name or inode. | 
|---|
| 462 | * | 
|---|
| 463 | * Note that once a process steps out of the renamed dentry then | 
|---|
| 464 | * the final dput will kill the dentry. | 
|---|
| 465 | */ | 
|---|
| 466 | if (rc == 0 && dentry->d_inode->i_ino != INODENUM_ROOTDIR_FILE) | 
|---|
| 467 | { | 
|---|
| 468 | setCred(&eCred); | 
|---|
| 469 |  | 
|---|
| 470 | privVfsP = VP_TO_PVP(dentry->d_inode); | 
|---|
| 471 | DBGASSERT(privVfsP != NULL); | 
|---|
| 472 | LOGASSERT(dentry->d_parent != NULL); | 
|---|
| 473 |  | 
|---|
| 474 | diP = dentry->d_parent->d_inode; | 
|---|
| 475 | dcnP = VP_TO_CNP(diP); | 
|---|
| 476 |  | 
|---|
| 477 | rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP, | 
|---|
| 478 | NULL, (char *)dentry->d_name.name, | 
|---|
| 479 | (void **)&newInodeP, &cnP, &iNum, NULL, | 
|---|
| 480 | NULL, &eCred, (void **)&retP); | 
|---|
| 481 | if (rc == 0) | 
|---|
| 482 | { | 
|---|
| 483 | iput(newInodeP); | 
|---|
| 484 |  | 
|---|
| 485 | if (iNum != dentry->d_inode->i_ino) | 
|---|
| 486 | rc = ESTALE; | 
|---|
| 487 | } | 
|---|
| 488 |  | 
|---|
| 489 | /* The name is either no longer valid or has been renamed | 
|---|
| 490 | * and recreated with a different inode.  We need to drop | 
|---|
| 491 | * the dentry from the hash list so another process can't | 
|---|
| 492 | * proceed into that tree. | 
|---|
| 493 | */ | 
|---|
| 494 | if (rc) | 
|---|
| 495 | { | 
|---|
| 496 | DENTRY_D_DROP(dentry); | 
|---|
| 497 |  | 
|---|
| 498 | d_prune_aliases(dentry->d_inode); | 
|---|
| 499 |  | 
|---|
| 500 | /* If the dentry still has processes sitting underneath | 
|---|
| 501 | * it we'll still claim its valid. | 
|---|
| 502 | */ | 
|---|
| 503 | if (atomic_read(&dentry->d_count) > 1) | 
|---|
| 504 | rc = 0; | 
|---|
| 505 | } | 
|---|
| 506 | } | 
|---|
| 507 |  | 
|---|
| 508 | xerror: | 
|---|
| 509 | TRACE2(TRACE_VNODE, 4, TRCID_DIR_REVALIDATE_EX, | 
|---|
| 510 | "gpfs_d_revalidate exit: dentry 0x%lX rc %d\n", | 
|---|
| 511 | dentry, rc); | 
|---|
| 512 | EXIT(0); | 
|---|
| 513 | if (rc) | 
|---|
| 514 | return false; | 
|---|
| 515 | else | 
|---|
| 516 | return true; | 
|---|
| 517 | } | 
|---|
| 518 |  | 
|---|
| 519 | #ifdef CCL | 
|---|
| 520 | /* The d_revalidate function checks whether the directory entry | 
|---|
| 521 | cached in the given dentry struct is still valid. | 
|---|
| 522 | Any dentry referencing this operation is | 
|---|
| 523 | a positive dentry that was created for | 
|---|
| 524 | an inexact caseless file name match for a Samba client. | 
|---|
| 525 | The d_revalidate returns "true" for subsequent Samba clients | 
|---|
| 526 | indicating that the positive dcache entry is still valid. | 
|---|
| 527 | It returns "false" for local or NFS clients indicating | 
|---|
| 528 | that the dcache entry is no longer valid which forces | 
|---|
| 529 | a new lookup. */ | 
|---|
| 530 | int | 
|---|
| 531 | #if LINUX_KERNEL_VERSION >= 2060000 | 
|---|
| 532 | gpfs_d_valid_if_Samba(struct dentry *dentry, struct nameidata *ni) | 
|---|
| 533 | #else | 
|---|
| 534 | gpfs_d_valid_if_Samba(struct dentry *dentry, int flags) | 
|---|
| 535 | #endif | 
|---|
| 536 | { | 
|---|
| 537 | TRACE4(TRACE_VNODE, 4, TRCID_DIR_VALID_IF_SAMBA, | 
|---|
| 538 | "gpfs_d_valid_if_Samba: dentry 0x%lX " | 
|---|
| 539 | "d_inode 0x%lX (name '%s') returns %s\n", | 
|---|
| 540 | dentry, dentry->d_inode, dentry->d_name.name, | 
|---|
| 541 | (cxiIsSambaThread() ? "true" : "false")); | 
|---|
| 542 | return cxiIsSambaThread(); | 
|---|
| 543 | } | 
|---|
| 544 |  | 
|---|
| 545 | /* The d_revalidate function checks whether the directory entry | 
|---|
| 546 | cached in the given dentry struct is still valid. | 
|---|
| 547 | Any dentry referencing this operation is | 
|---|
| 548 | a negative dentry that was created for | 
|---|
| 549 | an exact file name match which failed for a local or NFS client. | 
|---|
| 550 | The d_revalidate returns "true" for subsequent local or NFS clients | 
|---|
| 551 | indicating that the negative dcache entry is still valid. | 
|---|
| 552 | It returns "false" for Samba clients indicating | 
|---|
| 553 | that the dcache entry is no longer valid which forces | 
|---|
| 554 | a new lookup. */ | 
|---|
| 555 | int | 
|---|
| 556 | #if LINUX_KERNEL_VERSION >= 2060000 | 
|---|
| 557 | gpfs_d_invalid_if_Samba(struct dentry *dentry, struct nameidata *ni) | 
|---|
| 558 | #else | 
|---|
| 559 | gpfs_d_invalid_if_Samba(struct dentry *dentry, int flags) | 
|---|
| 560 | #endif | 
|---|
| 561 | { | 
|---|
| 562 | TRACE4(TRACE_VNODE, 4, TRCID_DIR_INVALID_IF_SAMBA, | 
|---|
| 563 | "gpfs_d_invalid_if_Samba: dentry 0x%lX " | 
|---|
| 564 | "d_inode 0x%lX (name '%s') returns %s\n", | 
|---|
| 565 | dentry, dentry->d_inode, dentry->d_name.name, | 
|---|
| 566 | (cxiIsSambaThread() ? "false" : "true")); | 
|---|
| 567 | return !cxiIsSambaThread(); | 
|---|
| 568 | } | 
|---|
| 569 | #endif | 
|---|