FreeBSD kernel kern code
vfs_cache.c
Go to the documentation of this file.
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  * The Regents of the University of California. All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Poul-Henning Kamp of the FreeBSD Project.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in the
15  * documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$BSDSUniX$");
37 
38 #include "opt_kdtrace.h"
39 #include "opt_ktrace.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/filedesc.h>
44 #include <sys/fnv_hash.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/fcntl.h>
49 #include <sys/mount.h>
50 #include <sys/namei.h>
51 #include <sys/proc.h>
52 #include <sys/rwlock.h>
53 #include <sys/sdt.h>
54 #include <sys/syscallsubr.h>
55 #include <sys/sysctl.h>
56 #include <sys/sysproto.h>
57 #include <sys/vnode.h>
58 #ifdef KTRACE
59 #include <sys/ktrace.h>
60 #endif
61 
62 #include <vm/uma.h>
63 
65 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *",
66  "struct vnode *");
67 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *",
68  "char *");
69 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *");
70 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *",
71  "char *", "struct vnode *");
72 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *");
73 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int",
74  "struct vnode *", "char *");
75 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *",
76  "struct vnode *");
77 SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative,
78  "struct vnode *", "char *");
79 SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *",
80  "char *");
81 SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *");
82 SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *");
83 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *");
84 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *",
85  "struct vnode *");
86 SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *",
87  "char *");
88 
89 /*
90  * This structure describes the elements in the cache of recent
91  * names looked up by namei.
92  */
93 
94 struct namecache {
95  LIST_ENTRY(namecache) nc_hash; /* hash chain */
96  LIST_ENTRY(namecache) nc_src; /* source vnode list */
97  TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
98  struct vnode *nc_dvp; /* vnode of parent of name */
99  struct vnode *nc_vp; /* vnode the name refers to */
100  u_char nc_flag; /* flag bits */
101  u_char nc_nlen; /* length of name */
102  char nc_name[0]; /* segment name + nul */
103 };
104 
105 /*
106  * struct namecache_ts repeats struct namecache layout up to the
107  * nc_nlen member.
108  * struct namecache_ts is used in place of struct namecache when time(s) need
109  * to be stored. The nc_dotdottime field is used when a cache entry is mapping
110  * both a non-dotdot directory name plus dotdot for the directory's
111  * parent.
112  */
113 struct namecache_ts {
114  LIST_ENTRY(namecache) nc_hash; /* hash chain */
115  LIST_ENTRY(namecache) nc_src; /* source vnode list */
116  TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
117  struct vnode *nc_dvp; /* vnode of parent of name */
118  struct vnode *nc_vp; /* vnode the name refers to */
119  u_char nc_flag; /* flag bits */
120  u_char nc_nlen; /* length of name */
121  struct timespec nc_time; /* timespec provided by fs */
122  struct timespec nc_dotdottime; /* dotdot timespec provided by fs */
123  int nc_ticks; /* ticks value when entry was added */
124  char nc_name[0]; /* segment name + nul */
125 };
126 
127 /*
128  * Flags in namecache.nc_flag
129  */
130 #define NCF_WHITE 0x01
131 #define NCF_ISDOTDOT 0x02
132 #define NCF_TS 0x04
133 #define NCF_DTS 0x08
134 
135 /*
136  * Name caching works as follows:
137  *
138  * Names found by directory scans are retained in a cache
139  * for future reference. It is managed LRU, so frequently
140  * used names will hang around. Cache is indexed by hash value
141  * obtained from (vp, name) where vp refers to the directory
142  * containing name.
143  *
144  * If it is a "negative" entry, (i.e. for a name that is known NOT to
145  * exist) the vnode pointer will be NULL.
146  *
147  * Upon reaching the last segment of a path, if the reference
148  * is for DELETE, or NOCACHE is set (rewrite), and the
149  * name is located in the cache, it will be dropped.
150  */
151 
152 /*
153  * Structures associated with name cacheing.
154  */
155 #define NCHHASH(hash) \
156  (&nchashtbl[(hash) & nchash])
157 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
158 static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */
159 static u_long nchash; /* size of hash table */
160 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
161  "Size of namecache hash table");
162 static u_long ncnegfactor = 16; /* ratio of negative entries */
163 SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0,
164  "Ratio of negative namecache entries");
165 static u_long numneg; /* number of negative entries allocated */
166 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0,
167  "Number of negative entries in namecache");
168 static u_long numcache; /* number of cache entries allocated */
169 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0,
170  "Number of namecache entries");
171 static u_long numcachehv; /* number of cache entries with vnodes held */
172 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0,
173  "Number of namecache entries with vnodes held");
174 static u_int ncsizefactor = 2;
175 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
176  "Size factor for namecache");
177 
178 struct nchstats nchstats; /* cache effectiveness statistics */
179 
180 static struct rwlock cache_lock;
181 RW_SYSINIT(vfscache, &cache_lock, "Name Cache");
182 
183 #define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock)
184 #define CACHE_RLOCK() rw_rlock(&cache_lock)
185 #define CACHE_RUNLOCK() rw_runlock(&cache_lock)
186 #define CACHE_WLOCK() rw_wlock(&cache_lock)
187 #define CACHE_WUNLOCK() rw_wunlock(&cache_lock)
188 
189 /*
190  * UMA zones for the VFS cache.
191  *
192  * The small cache is used for entries with short names, which are the
193  * most common. The large cache is used for entries which are too big to
194  * fit in the small cache.
195  */
196 static uma_zone_t cache_zone_small;
197 static uma_zone_t cache_zone_small_ts;
198 static uma_zone_t cache_zone_large;
199 static uma_zone_t cache_zone_large_ts;
200 
201 #define CACHE_PATH_CUTOFF 35
202 
203 static struct namecache *
204 cache_alloc(int len, int ts)
205 {
206 
207  if (len > CACHE_PATH_CUTOFF) {
208  if (ts)
209  return (uma_zalloc(cache_zone_large_ts, M_WAITOK));
210  else
211  return (uma_zalloc(cache_zone_large, M_WAITOK));
212  }
213  if (ts)
214  return (uma_zalloc(cache_zone_small_ts, M_WAITOK));
215  else
216  return (uma_zalloc(cache_zone_small, M_WAITOK));
217 }
218 
219 static void
220 cache_free(struct namecache *ncp)
221 {
222  int ts;
223 
224  if (ncp == NULL)
225  return;
226  ts = ncp->nc_flag & NCF_TS;
227  if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) {
228  if (ts)
229  uma_zfree(cache_zone_small_ts, ncp);
230  else
231  uma_zfree(cache_zone_small, ncp);
232  } else if (ts)
233  uma_zfree(cache_zone_large_ts, ncp);
234  else
235  uma_zfree(cache_zone_large, ncp);
236 }
237 
238 static char *
239 nc_get_name(struct namecache *ncp)
240 {
241  struct namecache_ts *ncp_ts;
242 
243  if ((ncp->nc_flag & NCF_TS) == 0)
244  return (ncp->nc_name);
245  ncp_ts = (struct namecache_ts *)ncp;
246  return (ncp_ts->nc_name);
247 }
248 
249 static void
250 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
251 {
252 
253  KASSERT((ncp->nc_flag & NCF_TS) != 0 ||
254  (tsp == NULL && ticksp == NULL),
255  ("No NCF_TS"));
256 
257  if (tsp != NULL)
258  *tsp = ((struct namecache_ts *)ncp)->nc_time;
259  if (ticksp != NULL)
260  *ticksp = ((struct namecache_ts *)ncp)->nc_ticks;
261 }
262 
263 static int doingcache = 1; /* 1 => enable the cache */
264 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0,
265  "VFS namecache enabled");
266 
267 /* Export size information to userland */
268 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, SYSCTL_NULL_INT_PTR,
269  sizeof(struct namecache), "sizeof(struct namecache)");
270 
271 /*
272  * The new name cache statistics
273  */
274 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0,
275  "Name cache statistics");
276 #define STATNODE(mode, name, var, descr) \
277  SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr);
278 STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries");
279 STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries");
280 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls,
281  "Number of cache lookups");
282 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits,
283  "Number of '.' hits");
284 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits,
285  "Number of '..' hits");
286 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks,
287  "Number of checks in lookup");
288 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss,
289  "Number of cache misses");
290 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap,
291  "Number of cache misses we do not want to cache");
292 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps,
293  "Number of cache hits (positive) we do not want to cache");
294 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits,
295  "Number of cache hits (positive)");
296 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps,
297  "Number of cache hits (negative) we do not want to cache");
298 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits,
299  "Number of cache hits (negative)");
300 static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades,
301  "Number of updates of the cache after lookup (write lock + retry)");
302 
303 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE,
304  &nchstats, sizeof(nchstats), "LU",
305  "VFS cache effectiveness statistics");
306 
307 
308 
309 static void cache_zap(struct namecache *ncp);
310 static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
311  u_int *buflen);
312 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
313  char *buf, char **retbuf, u_int buflen);
314 
315 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
316 
317 #ifdef DIAGNOSTIC
318 /*
319  * Grab an atomic snapshot of the name cache hash chain lengths
320  */
321 static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL,
322  "hash table stats");
323 
324 static int
325 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
326 {
327  int error;
328  struct nchashhead *ncpp;
329  struct namecache *ncp;
330  int n_nchash;
331  int count;
332 
333  n_nchash = nchash + 1; /* nchash is max index, not count */
334  if (!req->oldptr)
335  return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
336 
337  /* Scan hash tables for applicable entries */
338  for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
339  CACHE_RLOCK();
340  count = 0;
341  LIST_FOREACH(ncp, ncpp, nc_hash) {
342  count++;
343  }
344  CACHE_RUNLOCK();
345  error = SYSCTL_OUT(req, &count, sizeof(count));
346  if (error)
347  return (error);
348  }
349  return (0);
350 }
351 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
352  CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
353  "nchash chain lengths");
354 
355 static int
356 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
357 {
358  int error;
359  struct nchashhead *ncpp;
360  struct namecache *ncp;
361  int n_nchash;
362  int count, maxlength, used, pct;
363 
364  if (!req->oldptr)
365  return SYSCTL_OUT(req, 0, 4 * sizeof(int));
366 
367  n_nchash = nchash + 1; /* nchash is max index, not count */
368  used = 0;
369  maxlength = 0;
370 
371  /* Scan hash tables for applicable entries */
372  for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
373  count = 0;
374  CACHE_RLOCK();
375  LIST_FOREACH(ncp, ncpp, nc_hash) {
376  count++;
377  }
378  CACHE_RUNLOCK();
379  if (count)
380  used++;
381  if (maxlength < count)
382  maxlength = count;
383  }
384  n_nchash = nchash + 1;
385  pct = (used * 100 * 100) / n_nchash;
386  error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
387  if (error)
388  return (error);
389  error = SYSCTL_OUT(req, &used, sizeof(used));
390  if (error)
391  return (error);
392  error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
393  if (error)
394  return (error);
395  error = SYSCTL_OUT(req, &pct, sizeof(pct));
396  if (error)
397  return (error);
398  return (0);
399 }
400 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
401  CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
402  "nchash chain lengths");
403 #endif
404 
405 /*
406  * cache_zap():
407  *
408  * Removes a namecache entry from cache, whether it contains an actual
409  * pointer to a vnode or if it is just a negative cache entry.
410  */
411 static void
413  struct namecache *ncp;
414 {
415  struct vnode *vp;
416 
417  rw_assert(&cache_lock, RA_WLOCKED);
418  CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
419 #ifdef KDTRACE_HOOKS
420  if (ncp->nc_vp != NULL) {
421  SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
422  nc_get_name(ncp), ncp->nc_vp);
423  } else {
424  SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
425  nc_get_name(ncp));
426  }
427 #endif
428  vp = NULL;
429  LIST_REMOVE(ncp, nc_hash);
430  if (ncp->nc_flag & NCF_ISDOTDOT) {
431  if (ncp == ncp->nc_dvp->v_cache_dd)
432  ncp->nc_dvp->v_cache_dd = NULL;
433  } else {
434  LIST_REMOVE(ncp, nc_src);
435  if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
436  vp = ncp->nc_dvp;
437  numcachehv--;
438  }
439  }
440  if (ncp->nc_vp) {
441  TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
442  if (ncp == ncp->nc_vp->v_cache_dd)
443  ncp->nc_vp->v_cache_dd = NULL;
444  } else {
445  TAILQ_REMOVE(&ncneg, ncp, nc_dst);
446  numneg--;
447  }
448  numcache--;
449  cache_free(ncp);
450  if (vp)
451  vdrop(vp);
452 }
453 
454 /*
455  * Lookup an entry in the cache
456  *
457  * Lookup is called with dvp pointing to the directory to search,
458  * cnp pointing to the name of the entry being sought. If the lookup
459  * succeeds, the vnode is returned in *vpp, and a status of -1 is
460  * returned. If the lookup determines that the name does not exist
461  * (negative cacheing), a status of ENOENT is returned. If the lookup
462  * fails, a status of zero is returned. If the directory vnode is
463  * recycled out from under us due to a forced unmount, a status of
464  * ENOENT is returned.
465  *
466  * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is
467  * unlocked. If we're looking up . an extra ref is taken, but the lock is
468  * not recursively acquired.
469  */
470 
471 int
472 cache_lookup_times(dvp, vpp, cnp, tsp, ticksp)
473  struct vnode *dvp;
474  struct vnode **vpp;
475  struct componentname *cnp;
476  struct timespec *tsp;
477  int *ticksp;
478 {
479  struct namecache *ncp;
480  uint32_t hash;
481  int error, ltype, wlocked;
482 
483  if (!doingcache) {
484  cnp->cn_flags &= ~MAKEENTRY;
485  return (0);
486  }
487 retry:
488  CACHE_RLOCK();
489  wlocked = 0;
490  numcalls++;
491  error = 0;
492 
493 retry_wlocked:
494  if (cnp->cn_nameptr[0] == '.') {
495  if (cnp->cn_namelen == 1) {
496  *vpp = dvp;
497  CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
498  dvp, cnp->cn_nameptr);
499  dothits++;
500  SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ".", *vpp);
501  if (tsp != NULL)
502  timespecclear(tsp);
503  if (ticksp != NULL)
504  *ticksp = ticks;
505  goto success;
506  }
507  if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
508  dotdothits++;
509  if (dvp->v_cache_dd == NULL) {
510  SDT_PROBE3(vfs, namecache, lookup, miss, dvp,
511  "..", NULL);
512  goto unlock;
513  }
514  if ((cnp->cn_flags & MAKEENTRY) == 0) {
515  if (!wlocked && !CACHE_UPGRADE_LOCK())
516  goto wlock;
517  if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
518  cache_zap(dvp->v_cache_dd);
519  dvp->v_cache_dd = NULL;
520  CACHE_WUNLOCK();
521  return (0);
522  }
523  ncp = dvp->v_cache_dd;
524  if (ncp->nc_flag & NCF_ISDOTDOT)
525  *vpp = ncp->nc_vp;
526  else
527  *vpp = ncp->nc_dvp;
528  /* Return failure if negative entry was found. */
529  if (*vpp == NULL)
530  goto negative_success;
531  CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
532  dvp, cnp->cn_nameptr, *vpp);
533  SDT_PROBE3(vfs, namecache, lookup, hit, dvp, "..",
534  *vpp);
535  cache_out_ts(ncp, tsp, ticksp);
536  if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) ==
537  NCF_DTS && tsp != NULL)
538  *tsp = ((struct namecache_ts *)ncp)->
539  nc_dotdottime;
540  goto success;
541  }
542  }
543 
544  hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
545  hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
546  LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
547  numchecks++;
548  if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
549  !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen))
550  break;
551  }
552 
553  /* We failed to find an entry */
554  if (ncp == NULL) {
555  SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
556  NULL);
557  if ((cnp->cn_flags & MAKEENTRY) == 0) {
558  nummisszap++;
559  } else {
560  nummiss++;
561  }
562  nchstats.ncs_miss++;
563  goto unlock;
564  }
565 
566  /* We don't want to have an entry, so dump it */
567  if ((cnp->cn_flags & MAKEENTRY) == 0) {
568  numposzaps++;
569  nchstats.ncs_badhits++;
570  if (!wlocked && !CACHE_UPGRADE_LOCK())
571  goto wlock;
572  cache_zap(ncp);
573  CACHE_WUNLOCK();
574  return (0);
575  }
576 
577  /* We found a "positive" match, return the vnode */
578  if (ncp->nc_vp) {
579  numposhits++;
580  nchstats.ncs_goodhits++;
581  *vpp = ncp->nc_vp;
582  CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
583  dvp, cnp->cn_nameptr, *vpp, ncp);
584  SDT_PROBE3(vfs, namecache, lookup, hit, dvp, nc_get_name(ncp),
585  *vpp);
586  cache_out_ts(ncp, tsp, ticksp);
587  goto success;
588  }
589 
590 negative_success:
591  /* We found a negative match, and want to create it, so purge */
592  if (cnp->cn_nameiop == CREATE) {
593  numnegzaps++;
594  nchstats.ncs_badhits++;
595  if (!wlocked && !CACHE_UPGRADE_LOCK())
596  goto wlock;
597  cache_zap(ncp);
598  CACHE_WUNLOCK();
599  return (0);
600  }
601 
602  if (!wlocked && !CACHE_UPGRADE_LOCK())
603  goto wlock;
604  numneghits++;
605  /*
606  * We found a "negative" match, so we shift it to the end of
607  * the "negative" cache entries queue to satisfy LRU. Also,
608  * check to see if the entry is a whiteout; indicate this to
609  * the componentname, if so.
610  */
611  TAILQ_REMOVE(&ncneg, ncp, nc_dst);
612  TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
613  nchstats.ncs_neghits++;
614  if (ncp->nc_flag & NCF_WHITE)
615  cnp->cn_flags |= ISWHITEOUT;
616  SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
617  nc_get_name(ncp));
618  cache_out_ts(ncp, tsp, ticksp);
619  CACHE_WUNLOCK();
620  return (ENOENT);
621 
622 wlock:
623  /*
624  * We need to update the cache after our lookup, so upgrade to
625  * a write lock and retry the operation.
626  */
627  CACHE_RUNLOCK();
628  CACHE_WLOCK();
629  numupgrades++;
630  wlocked = 1;
631  goto retry_wlocked;
632 
633 success:
634  /*
635  * On success we return a locked and ref'd vnode as per the lookup
636  * protocol.
637  */
638  if (dvp == *vpp) { /* lookup on "." */
639  VREF(*vpp);
640  if (wlocked)
641  CACHE_WUNLOCK();
642  else
643  CACHE_RUNLOCK();
644  /*
645  * When we lookup "." we still can be asked to lock it
646  * differently...
647  */
648  ltype = cnp->cn_lkflags & LK_TYPE_MASK;
649  if (ltype != VOP_ISLOCKED(*vpp)) {
650  if (ltype == LK_EXCLUSIVE) {
651  vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
652  if ((*vpp)->v_iflag & VI_DOOMED) {
653  /* forced unmount */
654  vrele(*vpp);
655  *vpp = NULL;
656  return (ENOENT);
657  }
658  } else
659  vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
660  }
661  return (-1);
662  }
663  ltype = 0; /* silence gcc warning */
664  if (cnp->cn_flags & ISDOTDOT) {
665  ltype = VOP_ISLOCKED(dvp);
666  VOP_UNLOCK(dvp, 0);
667  }
668  VI_LOCK(*vpp);
669  if (wlocked)
670  CACHE_WUNLOCK();
671  else
672  CACHE_RUNLOCK();
673  error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread);
674  if (cnp->cn_flags & ISDOTDOT) {
675  vn_lock(dvp, ltype | LK_RETRY);
676  if (dvp->v_iflag & VI_DOOMED) {
677  if (error == 0)
678  vput(*vpp);
679  *vpp = NULL;
680  return (ENOENT);
681  }
682  }
683  if (error) {
684  *vpp = NULL;
685  goto retry;
686  }
687  if ((cnp->cn_flags & ISLASTCN) &&
688  (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
689  ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
690  }
691  return (-1);
692 
693 unlock:
694  if (wlocked)
695  CACHE_WUNLOCK();
696  else
697  CACHE_RUNLOCK();
698  return (0);
699 }
700 
701 /*
702  * Add an entry to the cache.
703  */
704 void
705 cache_enter_time(dvp, vp, cnp, tsp, dtsp)
706  struct vnode *dvp;
707  struct vnode *vp;
708  struct componentname *cnp;
709  struct timespec *tsp;
710  struct timespec *dtsp;
711 {
712  struct namecache *ncp, *n2;
713  struct namecache_ts *n3;
714  struct nchashhead *ncpp;
715  uint32_t hash;
716  int flag;
717  int hold;
718  int zap;
719  int len;
720 
721  CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
722  VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
723  ("cache_enter: Adding a doomed vnode"));
724  VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp,
725  ("cache_enter: Doomed vnode used as src"));
726 
727  if (!doingcache)
728  return;
729 
730  /*
731  * Avoid blowout in namecache entries.
732  */
733  if (numcache >= desiredvnodes * ncsizefactor)
734  return;
735 
736  flag = 0;
737  if (cnp->cn_nameptr[0] == '.') {
738  if (cnp->cn_namelen == 1)
739  return;
740  if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
741  CACHE_WLOCK();
742  /*
743  * If dotdot entry already exists, just retarget it
744  * to new parent vnode, otherwise continue with new
745  * namecache entry allocation.
746  */
747  if ((ncp = dvp->v_cache_dd) != NULL &&
748  ncp->nc_flag & NCF_ISDOTDOT) {
749  KASSERT(ncp->nc_dvp == dvp,
750  ("wrong isdotdot parent"));
751  if (ncp->nc_vp != NULL) {
752  TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
753  ncp, nc_dst);
754  } else {
755  TAILQ_REMOVE(&ncneg, ncp, nc_dst);
756  numneg--;
757  }
758  if (vp != NULL) {
759  TAILQ_INSERT_HEAD(&vp->v_cache_dst,
760  ncp, nc_dst);
761  } else {
762  TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
763  numneg++;
764  }
765  ncp->nc_vp = vp;
766  CACHE_WUNLOCK();
767  return;
768  }
769  dvp->v_cache_dd = NULL;
770  SDT_PROBE3(vfs, namecache, enter, done, dvp, "..", vp);
771  CACHE_WUNLOCK();
772  flag = NCF_ISDOTDOT;
773  }
774  }
775 
776  hold = 0;
777  zap = 0;
778 
779  /*
780  * Calculate the hash key and setup as much of the new
781  * namecache entry as possible before acquiring the lock.
782  */
783  ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
784  ncp->nc_vp = vp;
785  ncp->nc_dvp = dvp;
786  ncp->nc_flag = flag;
787  if (tsp != NULL) {
788  n3 = (struct namecache_ts *)ncp;
789  n3->nc_time = *tsp;
790  n3->nc_ticks = ticks;
791  n3->nc_flag |= NCF_TS;
792  if (dtsp != NULL) {
793  n3->nc_dotdottime = *dtsp;
794  n3->nc_flag |= NCF_DTS;
795  }
796  }
797  len = ncp->nc_nlen = cnp->cn_namelen;
798  hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
799  strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1);
800  hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
801  CACHE_WLOCK();
802 
803  /*
804  * See if this vnode or negative entry is already in the cache
805  * with this name. This can happen with concurrent lookups of
806  * the same path name.
807  */
808  ncpp = NCHHASH(hash);
809  LIST_FOREACH(n2, ncpp, nc_hash) {
810  if (n2->nc_dvp == dvp &&
811  n2->nc_nlen == cnp->cn_namelen &&
812  !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) {
813  if (tsp != NULL) {
814  KASSERT((n2->nc_flag & NCF_TS) != 0,
815  ("no NCF_TS"));
816  n3 = (struct namecache_ts *)n2;
817  n3->nc_time =
818  ((struct namecache_ts *)ncp)->nc_time;
819  n3->nc_ticks =
820  ((struct namecache_ts *)ncp)->nc_ticks;
821  if (dtsp != NULL) {
822  n3->nc_dotdottime =
823  ((struct namecache_ts *)ncp)->
824  nc_dotdottime;
825  n3->nc_flag |= NCF_DTS;
826  }
827  }
828  CACHE_WUNLOCK();
829  cache_free(ncp);
830  return;
831  }
832  }
833 
834  if (flag == NCF_ISDOTDOT) {
835  /*
836  * See if we are trying to add .. entry, but some other lookup
837  * has populated v_cache_dd pointer already.
838  */
839  if (dvp->v_cache_dd != NULL) {
840  CACHE_WUNLOCK();
841  cache_free(ncp);
842  return;
843  }
844  KASSERT(vp == NULL || vp->v_type == VDIR,
845  ("wrong vnode type %p", vp));
846  dvp->v_cache_dd = ncp;
847  }
848 
849  numcache++;
850  if (!vp) {
851  numneg++;
852  if (cnp->cn_flags & ISWHITEOUT)
853  ncp->nc_flag |= NCF_WHITE;
854  } else if (vp->v_type == VDIR) {
855  if (flag != NCF_ISDOTDOT) {
856  /*
857  * For this case, the cache entry maps both the
858  * directory name in it and the name ".." for the
859  * directory's parent.
860  */
861  if ((n2 = vp->v_cache_dd) != NULL &&
862  (n2->nc_flag & NCF_ISDOTDOT) != 0)
863  cache_zap(n2);
864  vp->v_cache_dd = ncp;
865  }
866  } else {
867  vp->v_cache_dd = NULL;
868  }
869 
870  /*
871  * Insert the new namecache entry into the appropriate chain
872  * within the cache entries table.
873  */
874  LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
875  if (flag != NCF_ISDOTDOT) {
876  if (LIST_EMPTY(&dvp->v_cache_src)) {
877  hold = 1;
878  numcachehv++;
879  }
880  LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
881  }
882 
883  /*
884  * If the entry is "negative", we place it into the
885  * "negative" cache queue, otherwise, we place it into the
886  * destination vnode's cache entries queue.
887  */
888  if (vp) {
889  TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
890  SDT_PROBE3(vfs, namecache, enter, done, dvp, nc_get_name(ncp),
891  vp);
892  } else {
893  TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
894  SDT_PROBE2(vfs, namecache, enter_negative, done, dvp,
895  nc_get_name(ncp));
896  }
897  if (numneg * ncnegfactor > numcache) {
898  ncp = TAILQ_FIRST(&ncneg);
899  KASSERT(ncp->nc_vp == NULL, ("ncp %p vp %p on ncneg",
900  ncp, ncp->nc_vp));
901  zap = 1;
902  }
903  if (hold)
904  vhold(dvp);
905  if (zap)
906  cache_zap(ncp);
907  CACHE_WUNLOCK();
908 }
909 
910 /*
911  * Name cache initialization, from vfs_init() when we are booting
912  */
913 static void
914 nchinit(void *dummy __unused)
915 {
916 
917  TAILQ_INIT(&ncneg);
918 
919  cache_zone_small = uma_zcreate("S VFS Cache",
920  sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1,
921  NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
922  cache_zone_small_ts = uma_zcreate("STS VFS Cache",
923  sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1,
924  NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
925  cache_zone_large = uma_zcreate("L VFS Cache",
926  sizeof(struct namecache) + NAME_MAX + 1,
927  NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
928  cache_zone_large_ts = uma_zcreate("LTS VFS Cache",
929  sizeof(struct namecache_ts) + NAME_MAX + 1,
930  NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
931 
932  nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
933 }
934 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
935 
936 
937 /*
938  * Invalidate all entries to a particular vnode.
939  */
940 void
942  struct vnode *vp;
943 {
944 
945  CTR1(KTR_VFS, "cache_purge(%p)", vp);
946  SDT_PROBE1(vfs, namecache, purge, done, vp);
947  CACHE_WLOCK();
948  while (!LIST_EMPTY(&vp->v_cache_src))
949  cache_zap(LIST_FIRST(&vp->v_cache_src));
950  while (!TAILQ_EMPTY(&vp->v_cache_dst))
951  cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
952  if (vp->v_cache_dd != NULL) {
953  KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT,
954  ("lost dotdot link"));
955  cache_zap(vp->v_cache_dd);
956  }
957  KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
958  CACHE_WUNLOCK();
959 }
960 
961 /*
962  * Invalidate all negative entries for a particular directory vnode.
963  */
964 void
966  struct vnode *vp;
967 {
968  struct namecache *cp, *ncp;
969 
970  CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
971  SDT_PROBE1(vfs, namecache, purge_negative, done, vp);
972  CACHE_WLOCK();
973  LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
974  if (cp->nc_vp == NULL)
975  cache_zap(cp);
976  }
977  CACHE_WUNLOCK();
978 }
979 
980 /*
981  * Flush all entries referencing a particular filesystem.
982  */
983 void
985  struct mount *mp;
986 {
987  struct nchashhead *ncpp;
988  struct namecache *ncp, *nnp;
989 
990  /* Scan hash tables for applicable entries */
991  SDT_PROBE1(vfs, namecache, purgevfs, done, mp);
992  CACHE_WLOCK();
993  for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
994  LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
995  if (ncp->nc_dvp->v_mount == mp)
996  cache_zap(ncp);
997  }
998  }
999  CACHE_WUNLOCK();
1000 }
1001 
1002 /*
1003  * Perform canonical checks and cache lookup and pass on to filesystem
1004  * through the vop_cachedlookup only if needed.
1005  */
1006 
1007 int
1009  struct vop_lookup_args /* {
1010  struct vnode *a_dvp;
1011  struct vnode **a_vpp;
1012  struct componentname *a_cnp;
1013  } */ *ap;
1014 {
1015  struct vnode *dvp;
1016  int error;
1017  struct vnode **vpp = ap->a_vpp;
1018  struct componentname *cnp = ap->a_cnp;
1019  struct ucred *cred = cnp->cn_cred;
1020  int flags = cnp->cn_flags;
1021  struct thread *td = cnp->cn_thread;
1022 
1023  *vpp = NULL;
1024  dvp = ap->a_dvp;
1025 
1026  if (dvp->v_type != VDIR)
1027  return (ENOTDIR);
1028 
1029  if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
1030  (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
1031  return (EROFS);
1032 
1033  error = VOP_ACCESS(dvp, VEXEC, cred, td);
1034  if (error)
1035  return (error);
1036 
1037  error = cache_lookup(dvp, vpp, cnp);
1038  if (error == 0)
1039  return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
1040  if (error == -1)
1041  return (0);
1042  return (error);
1043 }
1044 
1045 /*
1046  * XXX All of these sysctls would probably be more productive dead.
1047  */
1048 static int disablecwd;
1049 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
1050  "Disable the getcwd syscall");
1051 
1052 /* Implementation of the getcwd syscall. */
1053 int
1055  struct thread *td;
1056  struct __getcwd_args *uap;
1057 {
1058 
1059  return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen));
1060 }
1061 
1062 int
1063 kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, u_int buflen)
1064 {
1065  char *bp, *tmpbuf;
1066  struct filedesc *fdp;
1067  struct vnode *cdir, *rdir;
1068  int error, vfslocked;
1069 
1070  if (disablecwd)
1071  return (ENODEV);
1072  if (buflen < 2)
1073  return (EINVAL);
1074  if (buflen > MAXPATHLEN)
1075  buflen = MAXPATHLEN;
1076 
1077  tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
1078  fdp = td->td_proc->p_fd;
1079  FILEDESC_SLOCK(fdp);
1080  cdir = fdp->fd_cdir;
1081  VREF(cdir);
1082  rdir = fdp->fd_rdir;
1083  VREF(rdir);
1084  FILEDESC_SUNLOCK(fdp);
1085  error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
1086  vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
1087  vrele(rdir);
1088  VFS_UNLOCK_GIANT(vfslocked);
1089  vfslocked = VFS_LOCK_GIANT(cdir->v_mount);
1090  vrele(cdir);
1091  VFS_UNLOCK_GIANT(vfslocked);
1092 
1093  if (!error) {
1094  if (bufseg == UIO_SYSSPACE)
1095  bcopy(bp, buf, strlen(bp) + 1);
1096  else
1097  error = copyout(bp, buf, strlen(bp) + 1);
1098 #ifdef KTRACE
1099  if (KTRPOINT(curthread, KTR_NAMEI))
1100  ktrnamei(bp);
1101 #endif
1102  }
1103  free(tmpbuf, M_TEMP);
1104  return (error);
1105 }
1106 
1107 /*
1108  * Thus begins the fullpath magic.
1109  */
1110 
1111 #undef STATNODE
1112 #define STATNODE(name, descr) \
1113  static u_int name; \
1114  SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr)
1115 
1116 static int disablefullpath;
1117 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
1118  "Disable the vn_fullpath function");
1119 
1120 /* These count for kern___getcwd(), too. */
1121 STATNODE(numfullpathcalls, "Number of fullpath search calls");
1122 STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)");
1123 STATNODE(numfullpathfail2,
1124  "Number of fullpath search errors (VOP_VPTOCNP failures)");
1125 STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
1126 STATNODE(numfullpathfound, "Number of successful fullpath calls");
1127 
1128 /*
1129  * Retrieve the full filesystem path that correspond to a vnode from the name
1130  * cache (if available)
1131  */
1132 int
1133 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
1134 {
1135  char *buf;
1136  struct filedesc *fdp;
1137  struct vnode *rdir;
1138  int error, vfslocked;
1139 
1140  if (disablefullpath)
1141  return (ENODEV);
1142  if (vn == NULL)
1143  return (EINVAL);
1144 
1145  buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1146  fdp = td->td_proc->p_fd;
1147  FILEDESC_SLOCK(fdp);
1148  rdir = fdp->fd_rdir;
1149  VREF(rdir);
1150  FILEDESC_SUNLOCK(fdp);
1151  error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN);
1152  vfslocked = VFS_LOCK_GIANT(rdir->v_mount);
1153  vrele(rdir);
1154  VFS_UNLOCK_GIANT(vfslocked);
1155 
1156  if (!error)
1157  *freebuf = buf;
1158  else
1159  free(buf, M_TEMP);
1160  return (error);
1161 }
1162 
1163 /*
1164  * This function is similar to vn_fullpath, but it attempts to lookup the
1165  * pathname relative to the global root mount point. This is required for the
1166  * auditing sub-system, as audited pathnames must be absolute, relative to the
1167  * global root mount point.
1168  */
1169 int
1170 vn_fullpath_global(struct thread *td, struct vnode *vn,
1171  char **retbuf, char **freebuf)
1172 {
1173  char *buf;
1174  int error;
1175 
1176  if (disablefullpath)
1177  return (ENODEV);
1178  if (vn == NULL)
1179  return (EINVAL);
1180  buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
1181  error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
1182  if (!error)
1183  *freebuf = buf;
1184  else
1185  free(buf, M_TEMP);
1186  return (error);
1187 }
1188 
1189 int
1190 vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
1191 {
1192  int error;
1193 
1194  CACHE_RLOCK();
1195  error = vn_vptocnp_locked(vp, cred, buf, buflen);
1196  if (error == 0)
1197  CACHE_RUNLOCK();
1198  return (error);
1199 }
1200 
1201 static int
1202 vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
1203  u_int *buflen)
1204 {
1205  struct vnode *dvp;
1206  struct namecache *ncp;
1207  int error, vfslocked;
1208 
1209  TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) {
1210  if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
1211  break;
1212  }
1213  if (ncp != NULL) {
1214  if (*buflen < ncp->nc_nlen) {
1215  CACHE_RUNLOCK();
1216  vfslocked = VFS_LOCK_GIANT((*vp)->v_mount);
1217  vrele(*vp);
1218  VFS_UNLOCK_GIANT(vfslocked);
1219  numfullpathfail4++;
1220  error = ENOMEM;
1221  SDT_PROBE3(vfs, namecache, fullpath, return, error,
1222  vp, NULL);
1223  return (error);
1224  }
1225  *buflen -= ncp->nc_nlen;
1226  memcpy(buf + *buflen, nc_get_name(ncp), ncp->nc_nlen);
1227  SDT_PROBE3(vfs, namecache, fullpath, hit, ncp->nc_dvp,
1228  nc_get_name(ncp), vp);
1229  dvp = *vp;
1230  *vp = ncp->nc_dvp;
1231  vref(*vp);
1232  CACHE_RUNLOCK();
1233  vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
1234  vrele(dvp);
1235  VFS_UNLOCK_GIANT(vfslocked);
1236  CACHE_RLOCK();
1237  return (0);
1238  }
1239  SDT_PROBE1(vfs, namecache, fullpath, miss, vp);
1240 
1241  CACHE_RUNLOCK();
1242  vfslocked = VFS_LOCK_GIANT((*vp)->v_mount);
1243  vn_lock(*vp, LK_SHARED | LK_RETRY);
1244  error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen);
1245  vput(*vp);
1246  VFS_UNLOCK_GIANT(vfslocked);
1247  if (error) {
1248  numfullpathfail2++;
1249  SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL);
1250  return (error);
1251  }
1252 
1253  *vp = dvp;
1254  CACHE_RLOCK();
1255  if (dvp->v_iflag & VI_DOOMED) {
1256  /* forced unmount */
1257  CACHE_RUNLOCK();
1258  vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
1259  vrele(dvp);
1260  VFS_UNLOCK_GIANT(vfslocked);
1261  error = ENOENT;
1262  SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL);
1263  return (error);
1264  }
1265  /*
1266  * *vp has its use count incremented still.
1267  */
1268 
1269  return (0);
1270 }
1271 
1272 /*
1273  * The magic behind kern___getcwd() and vn_fullpath().
1274  */
1275 static int
1276 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
1277  char *buf, char **retbuf, u_int buflen)
1278 {
1279  int error, slash_prefixed, vfslocked;
1280 #ifdef KDTRACE_HOOKS
1281  struct vnode *startvp = vp;
1282 #endif
1283  struct vnode *vp1;
1284 
1285  buflen--;
1286  buf[buflen] = '\0';
1287  error = 0;
1288  slash_prefixed = 0;
1289 
1290  SDT_PROBE1(vfs, namecache, fullpath, entry, vp);
1291  numfullpathcalls++;
1292  vref(vp);
1293  CACHE_RLOCK();
1294  if (vp->v_type != VDIR) {
1295  error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
1296  if (error)
1297  return (error);
1298  if (buflen == 0) {
1299  CACHE_RUNLOCK();
1300  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1301  vrele(vp);
1302  VFS_UNLOCK_GIANT(vfslocked);
1303  return (ENOMEM);
1304  }
1305  buf[--buflen] = '/';
1306  slash_prefixed = 1;
1307  }
1308  while (vp != rdir && vp != rootvnode) {
1309  if (vp->v_vflag & VV_ROOT) {
1310  if (vp->v_iflag & VI_DOOMED) { /* forced unmount */
1311  CACHE_RUNLOCK();
1312  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1313  vrele(vp);
1314  VFS_UNLOCK_GIANT(vfslocked);
1315  error = ENOENT;
1316  SDT_PROBE3(vfs, namecache, fullpath, return,
1317  error, vp, NULL);
1318  break;
1319  }
1320  vp1 = vp->v_mount->mnt_vnodecovered;
1321  vref(vp1);
1322  CACHE_RUNLOCK();
1323  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1324  vrele(vp);
1325  VFS_UNLOCK_GIANT(vfslocked);
1326  vp = vp1;
1327  CACHE_RLOCK();
1328  continue;
1329  }
1330  if (vp->v_type != VDIR) {
1331  CACHE_RUNLOCK();
1332  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1333  vrele(vp);
1334  VFS_UNLOCK_GIANT(vfslocked);
1335  numfullpathfail1++;
1336  error = ENOTDIR;
1337  SDT_PROBE3(vfs, namecache, fullpath, return,
1338  error, vp, NULL);
1339  break;
1340  }
1341  error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
1342  if (error)
1343  break;
1344  if (buflen == 0) {
1345  CACHE_RUNLOCK();
1346  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1347  vrele(vp);
1348  VFS_UNLOCK_GIANT(vfslocked);
1349  error = ENOMEM;
1350  SDT_PROBE3(vfs, namecache, fullpath, return, error,
1351  startvp, NULL);
1352  break;
1353  }
1354  buf[--buflen] = '/';
1355  slash_prefixed = 1;
1356  }
1357  if (error)
1358  return (error);
1359  if (!slash_prefixed) {
1360  if (buflen == 0) {
1361  CACHE_RUNLOCK();
1362  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1363  vrele(vp);
1364  VFS_UNLOCK_GIANT(vfslocked);
1365  numfullpathfail4++;
1366  SDT_PROBE3(vfs, namecache, fullpath, return, ENOMEM,
1367  startvp, NULL);
1368  return (ENOMEM);
1369  }
1370  buf[--buflen] = '/';
1371  }
1372  numfullpathfound++;
1373  CACHE_RUNLOCK();
1374  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1375  vrele(vp);
1376  VFS_UNLOCK_GIANT(vfslocked);
1377 
1378  SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, buf + buflen);
1379  *retbuf = buf + buflen;
1380  return (0);
1381 }
1382 
1383 struct vnode *
1384 vn_dir_dd_ino(struct vnode *vp)
1385 {
1386  struct namecache *ncp;
1387  struct vnode *ddvp;
1388 
1389  ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino");
1390  CACHE_RLOCK();
1391  TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) {
1392  if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
1393  continue;
1394  ddvp = ncp->nc_dvp;
1395  VI_LOCK(ddvp);
1396  CACHE_RUNLOCK();
1397  if (vget(ddvp, LK_INTERLOCK | LK_SHARED | LK_NOWAIT, curthread))
1398  return (NULL);
1399  return (ddvp);
1400  }
1401  CACHE_RUNLOCK();
1402  return (NULL);
1403 }
1404 
1405 int
1406 vn_commname(struct vnode *vp, char *buf, u_int buflen)
1407 {
1408  struct namecache *ncp;
1409  int l;
1410 
1411  CACHE_RLOCK();
1412  TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
1413  if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
1414  break;
1415  if (ncp == NULL) {
1416  CACHE_RUNLOCK();
1417  return (ENOENT);
1418  }
1419  l = min(ncp->nc_nlen, buflen - 1);
1420  memcpy(buf, nc_get_name(ncp), l);
1421  CACHE_RUNLOCK();
1422  buf[l] = '\0';
1423  return (0);
1424 }
1425 
1426 /* ABI compat shims for old kernel modules. */
1427 #undef cache_enter
1428 #undef cache_lookup
1429 
1430 void cache_enter(struct vnode *dvp, struct vnode *vp,
1431  struct componentname *cnp);
1432 int cache_lookup(struct vnode *dvp, struct vnode **vpp,
1433  struct componentname *cnp);
1434 
1435 void
1436 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1437 {
1438 
1439  cache_enter_time(dvp, vp, cnp, NULL, NULL);
1440 }
1441 
1442 int
1443 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1444 {
1445 
1446  return (cache_lookup_times(dvp, vpp, cnp, NULL, NULL));
1447 }
1448 
1449 /*
1450  * This function updates path string to vnode's full global path
1451  * and checks the size of the new path string against the pathlen argument.
1452  *
1453  * Requires a locked, referenced vnode and GIANT lock held.
1454  * Vnode is re-locked on success or ENODEV, otherwise unlocked.
1455  *
1456  * If sysctl debug.disablefullpath is set, ENODEV is returned,
1457  * vnode is left locked and path remain untouched.
1458  *
1459  * If vp is a directory, the call to vn_fullpath_global() always succeeds
1460  * because it falls back to the ".." lookup if the namecache lookup fails.
1461  */
1462 int
1463 vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path,
1464  u_int pathlen)
1465 {
1466  struct nameidata nd;
1467  struct vnode *vp1;
1468  char *rpath, *fbuf;
1469  int error, vfslocked;
1470 
1471  VFS_ASSERT_GIANT(vp->v_mount);
1472  ASSERT_VOP_ELOCKED(vp, __func__);
1473 
1474  /* Return ENODEV if sysctl debug.disablefullpath==1 */
1475  if (disablefullpath)
1476  return (ENODEV);
1477 
1478  /* Construct global filesystem path from vp. */
1479  VOP_UNLOCK(vp, 0);
1480  error = vn_fullpath_global(td, vp, &rpath, &fbuf);
1481 
1482  if (error != 0) {
1483  vrele(vp);
1484  return (error);
1485  }
1486 
1487  if (strlen(rpath) >= pathlen) {
1488  vrele(vp);
1489  error = ENAMETOOLONG;
1490  goto out;
1491  }
1492 
1493  /*
1494  * Re-lookup the vnode by path to detect a possible rename.
1495  * As a side effect, the vnode is relocked.
1496  * If vnode was renamed, return ENOENT.
1497  */
1498  NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
1499  UIO_SYSSPACE, path, td);
1500  error = namei(&nd);
1501  if (error != 0) {
1502  vrele(vp);
1503  goto out;
1504  }
1505  vfslocked = NDHASGIANT(&nd);
1506  NDFREE(&nd, NDF_ONLY_PNBUF);
1507  vp1 = nd.ni_vp;
1508  vrele(vp);
1509  if (vp1 == vp)
1510  strcpy(path, rpath);
1511  else {
1512  vput(vp1);
1513  error = ENOENT;
1514  }
1515  VFS_UNLOCK_GIANT(vfslocked);
1516 
1517 out:
1518  free(fbuf, M_TEMP);
1519  return (error);
1520 }
#define STATNODE(mode, name, var, descr)
Definition: vfs_cache.c:1112
static char * nc_get_name(struct namecache *ncp)
Definition: vfs_cache.c:239
static u_long nummisszap
Definition: vfs_cache.c:290
static u_long dothits
Definition: vfs_cache.c:282
static u_long numcalls
Definition: vfs_cache.c:280
struct buf * buf
Definition: vfs_bio.c:97
void * hashinit(int elements, struct malloc_type *type, u_long *hashmask)
Definition: subr_hash.c:83
char * path
int vn_fullpath_global(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
Definition: vfs_cache.c:1170
void NDFREE(struct nameidata *ndp, const u_int flags)
Definition: vfs_lookup.c:1091
struct timespec * ts
Definition: clock_if.m:39
#define NCHHASH(hash)
Definition: vfs_cache.c:155
RW_SYSINIT(khelplistlock,&khelp_list_lock,"helper list lock")
void * malloc(unsigned long size, struct malloc_type *mtp, int flags)
Definition: kern_malloc.c:454
void cache_purge(struct vnode *vp)
Definition: vfs_cache.c:941
int vfs_cache_lookup(struct vop_lookup_args *ap)
Definition: vfs_cache.c:1008
int vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, u_int pathlen)
Definition: vfs_cache.c:1463
int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
Definition: vfs_cache.c:1190
static u_long numchecks
Definition: vfs_cache.c:286
int vget(struct vnode *vp, int flags, struct thread *td)
Definition: vfs_subr.c:2258
TAILQ_HEAD(note_info_list, note_info)
int desiredvnodes
Definition: vfs_subr.c:276
struct vnode * vn_dir_dd_ino(struct vnode *vp)
Definition: vfs_cache.c:1384
int cache_lookup_times(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct timespec *tsp, int *ticksp)
Definition: vfs_cache.c:472
int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
Definition: vfs_cache.c:1133
SDT_PROVIDER_DECLARE(vfs)
static u_long nummiss
Definition: vfs_cache.c:288
int vn_commname(struct vnode *vp, char *buf, u_int buflen)
Definition: vfs_cache.c:1406
static u_long dotdothits
Definition: vfs_cache.c:284
int sys___getcwd(struct thread *td, struct __getcwd_args *uap)
Definition: vfs_cache.c:1054
SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW,&idletick, 0,"Run periodic events when idle")
#define CACHE_RLOCK()
#define CACHE_WLOCK()
__FBSDID("$BSDSUniX$")
void vdrop(struct vnode *vp)
Definition: vfs_subr.c:2493
static int disablecwd
Definition: vfs_cache.c:1048
static u_long numnegzaps
Definition: vfs_cache.c:296
void vref(struct vnode *vp)
Definition: vfs_subr.c:2302
static void cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
Definition: vfs_cache.c:250
void vput(struct vnode *vp)
Definition: vfs_subr.c:2428
SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW,&ps_arg_cache_limit, 0,"")
int cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
Definition: vfs_cache.c:1443
static void nchinit(void *dummy __unused)
Definition: vfs_cache.c:914
static void cache_free(struct namecache *ncp)
Definition: vfs_cache.c:220
static int dummy
#define CACHE_WUNLOCK()
static void cache_zap(struct namecache *ncp)
Definition: vfs_cache.c:412
#define NCF_DTS
Definition: vfs_cache.c:133
void cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct timespec *tsp, struct timespec *dtsp)
Definition: vfs_cache.c:705
void cache_purgevfs(struct mount *mp)
Definition: vfs_cache.c:984
int kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, u_int buflen)
Definition: vfs_cache.c:1063
int namei(struct nameidata *ndp)
Definition: vfs_lookup.c:135
static u_long numposhits
Definition: vfs_cache.c:294
#define NCF_TS
Definition: vfs_cache.c:132
#define CACHE_UPGRADE_LOCK()
SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD|CTLFLAG_MPSAFE,&nchstats, sizeof(nchstats),"LU","VFS cache effectiveness statistics")
#define CACHE_PATH_CUTOFF
static int doingcache
Definition: vfs_cache.c:263
#define NCF_WHITE
Definition: vfs_cache.c:130
void cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
Definition: vfs_cache.c:1436
SYSCTL_PROC(_kern, OID_AUTO, acct_chkfreq, CTLTYPE_INT|CTLFLAG_RW,&acctchkfreq, 0, sysctl_acct_chkfreq,"I","frequency for checking the free space")
SDT_PROBE_DEFINE3(vfs, namecache, enter, done,"struct vnode *","char *","struct vnode *")
void free(void *addr, struct malloc_type *mtp)
Definition: kern_malloc.c:554
static u_long numupgrades
Definition: vfs_cache.c:300
static LIST_HEAD(nchashhead, namecache)
Definition: vfs_cache.c:157
void vhold(struct vnode *vp)
Definition: vfs_subr.c:2448
static MALLOC_DEFINE(M_VFSCACHE,"vfscache","VFS name cache entries")
void vrele(struct vnode *vp)
Definition: vfs_subr.c:2416
void cache_purge_negative(struct vnode *vp)
Definition: vfs_cache.c:965
#define NCF_ISDOTDOT
Definition: vfs_cache.c:131
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL)
static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0,"Name cache statistics")
volatile int ticks
Definition: kern_clock.c:387
static u_long numneghits
Definition: vfs_cache.c:298
SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done,"struct vnode *","char *")
static int disablefullpath
Definition: vfs_cache.c:1116
SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW,&doingcache, 0,"VFS namecache enabled")
static u_long numposzaps
Definition: vfs_cache.c:292
static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen)
Definition: vfs_cache.c:1276
static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
Definition: vfs_cache.c:1202
#define CACHE_RUNLOCK()
struct vnode * rootvnode
Definition: vfs_mountroot.c:96
int lookup(struct nameidata *ndp)
Definition: vfs_lookup.c:473
SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry,"struct vnode *")
int flag
int * count
Definition: cpufreq_if.m:63