FreeBSD kernel kern code
kern_ktrace.c
Go to the documentation of this file.
1 /*-
2  * Copyright (c) 1989, 1993
3  * The Regents of the University of California.
4  * Copyright (c) 2005 Robert N. M. Watson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  * 4. Neither the name of the University nor the names of its contributors
16  * may be used to endorse or promote products derived from this software
17  * without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$BSDSUniX$");
36 
37 #include "opt_ktrace.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/fcntl.h>
42 #include <sys/kernel.h>
43 #include <sys/kthread.h>
44 #include <sys/lock.h>
45 #include <sys/mutex.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/namei.h>
49 #include <sys/priv.h>
50 #include <sys/proc.h>
51 #include <sys/unistd.h>
52 #include <sys/vnode.h>
53 #include <sys/socket.h>
54 #include <sys/stat.h>
55 #include <sys/ktrace.h>
56 #include <sys/sx.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysent.h>
59 #include <sys/syslog.h>
60 #include <sys/sysproto.h>
61 
62 #include <security/mac/mac_framework.h>
63 
64 /*
65  * The ktrace facility allows the tracing of certain key events in user space
66  * processes, such as system calls, signal delivery, context switches, and
67  * user generated events using utrace(2). It works by streaming event
68  * records and data to a vnode associated with the process using the
69  * ktrace(2) system call. In general, records can be written directly from
70  * the context that generates the event. One important exception to this is
71  * during a context switch, where sleeping is not permitted. To handle this
72  * case, trace events are generated using in-kernel ktr_request records, and
73  * then delivered to disk at a convenient moment -- either immediately, the
74  * next traceable event, at system call return, or at process exit.
75  *
76  * When dealing with multiple threads or processes writing to the same event
77  * log, ordering guarantees are weak: specifically, if an event has multiple
78  * records (i.e., system call enter and return), they may be interlaced with
79  * records from another event. Process and thread ID information is provided
80  * in the record, and user applications can de-interlace events if required.
81  */
82 
83 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
84 
85 #ifdef KTRACE
86 
87 FEATURE(ktrace, "Kernel support for system-call tracing");
88 
89 #ifndef KTRACE_REQUEST_POOL
90 #define KTRACE_REQUEST_POOL 100
91 #endif
92 
93 struct ktr_request {
94  struct ktr_header ktr_header;
95  void *ktr_buffer;
96  union {
97  struct ktr_proc_ctor ktr_proc_ctor;
98  struct ktr_syscall ktr_syscall;
99  struct ktr_sysret ktr_sysret;
100  struct ktr_genio ktr_genio;
101  struct ktr_psig ktr_psig;
102  struct ktr_csw ktr_csw;
103  struct ktr_fault ktr_fault;
104  struct ktr_faultend ktr_faultend;
105  } ktr_data;
106  STAILQ_ENTRY(ktr_request) ktr_list;
107 };
108 
109 static int data_lengths[] = {
110  0, /* none */
111  offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */
112  sizeof(struct ktr_sysret), /* KTR_SYSRET */
113  0, /* KTR_NAMEI */
114  sizeof(struct ktr_genio), /* KTR_GENIO */
115  sizeof(struct ktr_psig), /* KTR_PSIG */
116  sizeof(struct ktr_csw), /* KTR_CSW */
117  0, /* KTR_USER */
118  0, /* KTR_STRUCT */
119  0, /* KTR_SYSCTL */
120  sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */
121  0, /* KTR_PROCDTOR */
122  0, /* unused */
123  sizeof(struct ktr_fault), /* KTR_FAULT */
124  sizeof(struct ktr_faultend), /* KTR_FAULTEND */
125 };
126 
127 static STAILQ_HEAD(, ktr_request) ktr_free;
128 
129 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
130 
131 static u_int ktr_requestpool = KTRACE_REQUEST_POOL;
132 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
133 
134 static u_int ktr_geniosize = PAGE_SIZE;
135 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
136 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
137  0, "Maximum size of genio event payload");
138 
139 static int print_message = 1;
140 static struct mtx ktrace_mtx;
141 static struct sx ktrace_sx;
142 
143 static void ktrace_init(void *dummy);
144 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
145 static u_int ktrace_resize_pool(u_int oldsize, u_int newsize);
146 static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type);
147 static struct ktr_request *ktr_getrequest(int type);
148 static void ktr_submitrequest(struct thread *td, struct ktr_request *req);
149 static void ktr_freeproc(struct proc *p, struct ucred **uc,
150  struct vnode **vp);
151 static void ktr_freerequest(struct ktr_request *req);
152 static void ktr_freerequest_locked(struct ktr_request *req);
153 static void ktr_writerequest(struct thread *td, struct ktr_request *req);
154 static int ktrcanset(struct thread *,struct proc *);
155 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
156 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
157 static void ktrprocctor_entered(struct thread *, struct proc *);
158 
159 /*
160  * ktrace itself generates events, such as context switches, which we do not
161  * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine
162  * whether or not it is in a region where tracing of events should be
163  * suppressed.
164  */
165 static void
166 ktrace_enter(struct thread *td)
167 {
168 
169  KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set"));
170  td->td_pflags |= TDP_INKTRACE;
171 }
172 
173 static void
174 ktrace_exit(struct thread *td)
175 {
176 
177  KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set"));
178  td->td_pflags &= ~TDP_INKTRACE;
179 }
180 
181 static void
182 ktrace_assert(struct thread *td)
183 {
184 
185  KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set"));
186 }
187 
188 static void
189 ktrace_init(void *dummy)
190 {
191  struct ktr_request *req;
192  int i;
193 
194  mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
195  sx_init(&ktrace_sx, "ktrace_sx");
196  STAILQ_INIT(&ktr_free);
197  for (i = 0; i < ktr_requestpool; i++) {
198  req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
199  STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
200  }
201 }
202 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
203 
204 static int
205 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
206 {
207  struct thread *td;
208  u_int newsize, oldsize, wantsize;
209  int error;
210 
211  /* Handle easy read-only case first to avoid warnings from GCC. */
212  if (!req->newptr) {
213  oldsize = ktr_requestpool;
214  return (SYSCTL_OUT(req, &oldsize, sizeof(u_int)));
215  }
216 
217  error = SYSCTL_IN(req, &wantsize, sizeof(u_int));
218  if (error)
219  return (error);
220  td = curthread;
221  ktrace_enter(td);
222  oldsize = ktr_requestpool;
223  newsize = ktrace_resize_pool(oldsize, wantsize);
224  ktrace_exit(td);
225  error = SYSCTL_OUT(req, &oldsize, sizeof(u_int));
226  if (error)
227  return (error);
228  if (wantsize > oldsize && newsize < wantsize)
229  return (ENOSPC);
230  return (0);
231 }
232 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
233  &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU",
234  "Pool buffer size for ktrace(1)");
235 
236 static u_int
237 ktrace_resize_pool(u_int oldsize, u_int newsize)
238 {
239  STAILQ_HEAD(, ktr_request) ktr_new;
240  struct ktr_request *req;
241  int bound;
242 
243  print_message = 1;
244  bound = newsize - oldsize;
245  if (bound == 0)
246  return (ktr_requestpool);
247  if (bound < 0) {
248  mtx_lock(&ktrace_mtx);
249  /* Shrink pool down to newsize if possible. */
250  while (bound++ < 0) {
251  req = STAILQ_FIRST(&ktr_free);
252  if (req == NULL)
253  break;
254  STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
255  ktr_requestpool--;
256  free(req, M_KTRACE);
257  }
258  } else {
259  /* Grow pool up to newsize. */
260  STAILQ_INIT(&ktr_new);
261  while (bound-- > 0) {
262  req = malloc(sizeof(struct ktr_request), M_KTRACE,
263  M_WAITOK);
264  STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list);
265  }
266  mtx_lock(&ktrace_mtx);
267  STAILQ_CONCAT(&ktr_free, &ktr_new);
268  ktr_requestpool += (newsize - oldsize);
269  }
270  mtx_unlock(&ktrace_mtx);
271  return (ktr_requestpool);
272 }
273 
274 /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */
275 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) ==
276  (sizeof((struct thread *)NULL)->td_name));
277 
278 static struct ktr_request *
279 ktr_getrequest_entered(struct thread *td, int type)
280 {
281  struct ktr_request *req;
282  struct proc *p = td->td_proc;
283  int pm;
284 
285  mtx_lock(&ktrace_mtx);
286  if (!KTRCHECK(td, type)) {
287  mtx_unlock(&ktrace_mtx);
288  return (NULL);
289  }
290  req = STAILQ_FIRST(&ktr_free);
291  if (req != NULL) {
292  STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
293  req->ktr_header.ktr_type = type;
294  if (p->p_traceflag & KTRFAC_DROP) {
295  req->ktr_header.ktr_type |= KTR_DROP;
296  p->p_traceflag &= ~KTRFAC_DROP;
297  }
298  mtx_unlock(&ktrace_mtx);
299  microtime(&req->ktr_header.ktr_time);
300  req->ktr_header.ktr_pid = p->p_pid;
301  req->ktr_header.ktr_tid = td->td_tid;
302  bcopy(td->td_name, req->ktr_header.ktr_comm,
303  sizeof(req->ktr_header.ktr_comm));
304  req->ktr_buffer = NULL;
305  req->ktr_header.ktr_len = 0;
306  } else {
307  p->p_traceflag |= KTRFAC_DROP;
308  pm = print_message;
309  print_message = 0;
310  mtx_unlock(&ktrace_mtx);
311  if (pm)
312  printf("Out of ktrace request objects.\n");
313  }
314  return (req);
315 }
316 
317 static struct ktr_request *
318 ktr_getrequest(int type)
319 {
320  struct thread *td = curthread;
321  struct ktr_request *req;
322 
323  ktrace_enter(td);
324  req = ktr_getrequest_entered(td, type);
325  if (req == NULL)
326  ktrace_exit(td);
327 
328  return (req);
329 }
330 
331 /*
332  * Some trace generation environments don't permit direct access to VFS,
333  * such as during a context switch where sleeping is not allowed. Under these
334  * circumstances, queue a request to the thread to be written asynchronously
335  * later.
336  */
337 static void
338 ktr_enqueuerequest(struct thread *td, struct ktr_request *req)
339 {
340 
341  mtx_lock(&ktrace_mtx);
342  STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list);
343  mtx_unlock(&ktrace_mtx);
344 }
345 
346 /*
347  * Drain any pending ktrace records from the per-thread queue to disk. This
348  * is used both internally before committing other records, and also on
349  * system call return. We drain all the ones we can find at the time when
350  * drain is requested, but don't keep draining after that as those events
351  * may be approximately "after" the current event.
352  */
353 static void
354 ktr_drain(struct thread *td)
355 {
356  struct ktr_request *queued_req;
357  STAILQ_HEAD(, ktr_request) local_queue;
358 
359  ktrace_assert(td);
360  sx_assert(&ktrace_sx, SX_XLOCKED);
361 
362  STAILQ_INIT(&local_queue);
363 
364  if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) {
365  mtx_lock(&ktrace_mtx);
366  STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr);
367  mtx_unlock(&ktrace_mtx);
368 
369  while ((queued_req = STAILQ_FIRST(&local_queue))) {
370  STAILQ_REMOVE_HEAD(&local_queue, ktr_list);
371  ktr_writerequest(td, queued_req);
372  ktr_freerequest(queued_req);
373  }
374  }
375 }
376 
377 /*
378  * Submit a trace record for immediate commit to disk -- to be used only
379  * where entering VFS is OK. First drain any pending records that may have
380  * been cached in the thread.
381  */
382 static void
383 ktr_submitrequest(struct thread *td, struct ktr_request *req)
384 {
385 
386  ktrace_assert(td);
387 
388  sx_xlock(&ktrace_sx);
389  ktr_drain(td);
390  ktr_writerequest(td, req);
391  ktr_freerequest(req);
392  sx_xunlock(&ktrace_sx);
393  ktrace_exit(td);
394 }
395 
396 static void
397 ktr_freerequest(struct ktr_request *req)
398 {
399 
400  mtx_lock(&ktrace_mtx);
401  ktr_freerequest_locked(req);
402  mtx_unlock(&ktrace_mtx);
403 }
404 
405 static void
406 ktr_freerequest_locked(struct ktr_request *req)
407 {
408 
409  mtx_assert(&ktrace_mtx, MA_OWNED);
410  if (req->ktr_buffer != NULL)
411  free(req->ktr_buffer, M_KTRACE);
412  STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
413 }
414 
415 /*
416  * Disable tracing for a process and release all associated resources.
417  * The caller is responsible for releasing a reference on the returned
418  * vnode and credentials.
419  */
420 static void
421 ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp)
422 {
423  struct ktr_request *req;
424 
425  PROC_LOCK_ASSERT(p, MA_OWNED);
426  mtx_assert(&ktrace_mtx, MA_OWNED);
427  *uc = p->p_tracecred;
428  p->p_tracecred = NULL;
429  if (vp != NULL)
430  *vp = p->p_tracevp;
431  p->p_tracevp = NULL;
432  p->p_traceflag = 0;
433  while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) {
434  STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list);
435  ktr_freerequest_locked(req);
436  }
437 }
438 
439 void
440 ktrsyscall(code, narg, args)
441  int code, narg;
442  register_t args[];
443 {
444  struct ktr_request *req;
445  struct ktr_syscall *ktp;
446  size_t buflen;
447  char *buf = NULL;
448 
449  buflen = sizeof(register_t) * narg;
450  if (buflen > 0) {
451  buf = malloc(buflen, M_KTRACE, M_WAITOK);
452  bcopy(args, buf, buflen);
453  }
454  req = ktr_getrequest(KTR_SYSCALL);
455  if (req == NULL) {
456  if (buf != NULL)
457  free(buf, M_KTRACE);
458  return;
459  }
460  ktp = &req->ktr_data.ktr_syscall;
461  ktp->ktr_code = code;
462  ktp->ktr_narg = narg;
463  if (buflen > 0) {
464  req->ktr_header.ktr_len = buflen;
465  req->ktr_buffer = buf;
466  }
467  ktr_submitrequest(curthread, req);
468 }
469 
470 void
471 ktrsysret(code, error, retval)
472  int code, error;
473  register_t retval;
474 {
475  struct ktr_request *req;
476  struct ktr_sysret *ktp;
477 
478  req = ktr_getrequest(KTR_SYSRET);
479  if (req == NULL)
480  return;
481  ktp = &req->ktr_data.ktr_sysret;
482  ktp->ktr_code = code;
483  ktp->ktr_error = error;
484  ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */
485  ktr_submitrequest(curthread, req);
486 }
487 
488 /*
489  * When a setuid process execs, disable tracing.
490  *
491  * XXX: We toss any pending asynchronous records.
492  */
493 void
494 ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp)
495 {
496 
497  PROC_LOCK_ASSERT(p, MA_OWNED);
498  mtx_lock(&ktrace_mtx);
499  ktr_freeproc(p, uc, vp);
500  mtx_unlock(&ktrace_mtx);
501 }
502 
503 /*
504  * When a process exits, drain per-process asynchronous trace records
505  * and disable tracing.
506  */
507 void
508 ktrprocexit(struct thread *td)
509 {
510  struct ktr_request *req;
511  struct proc *p;
512  struct ucred *cred;
513  struct vnode *vp;
514  int vfslocked;
515 
516  p = td->td_proc;
517  if (p->p_traceflag == 0)
518  return;
519 
520  ktrace_enter(td);
521  req = ktr_getrequest_entered(td, KTR_PROCDTOR);
522  if (req != NULL)
523  ktr_enqueuerequest(td, req);
524  sx_xlock(&ktrace_sx);
525  ktr_drain(td);
526  sx_xunlock(&ktrace_sx);
527  PROC_LOCK(p);
528  mtx_lock(&ktrace_mtx);
529  ktr_freeproc(p, &cred, &vp);
530  mtx_unlock(&ktrace_mtx);
531  PROC_UNLOCK(p);
532  if (vp != NULL) {
533  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
534  vrele(vp);
535  VFS_UNLOCK_GIANT(vfslocked);
536  }
537  if (cred != NULL)
538  crfree(cred);
539  ktrace_exit(td);
540 }
541 
542 static void
543 ktrprocctor_entered(struct thread *td, struct proc *p)
544 {
545  struct ktr_proc_ctor *ktp;
546  struct ktr_request *req;
547  struct thread *td2;
548 
549  ktrace_assert(td);
550  td2 = FIRST_THREAD_IN_PROC(p);
551  req = ktr_getrequest_entered(td2, KTR_PROCCTOR);
552  if (req == NULL)
553  return;
554  ktp = &req->ktr_data.ktr_proc_ctor;
555  ktp->sv_flags = p->p_sysent->sv_flags;
556  ktr_enqueuerequest(td2, req);
557 }
558 
559 void
560 ktrprocctor(struct proc *p)
561 {
562  struct thread *td = curthread;
563 
564  if ((p->p_traceflag & KTRFAC_MASK) == 0)
565  return;
566 
567  ktrace_enter(td);
568  ktrprocctor_entered(td, p);
569  ktrace_exit(td);
570 }
571 
572 /*
573  * When a process forks, enable tracing in the new process if needed.
574  */
575 void
576 ktrprocfork(struct proc *p1, struct proc *p2)
577 {
578 
579  PROC_LOCK(p1);
580  mtx_lock(&ktrace_mtx);
581  KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
582  if (p1->p_traceflag & KTRFAC_INHERIT) {
583  p2->p_traceflag = p1->p_traceflag;
584  if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
585  VREF(p2->p_tracevp);
586  KASSERT(p1->p_tracecred != NULL,
587  ("ktrace vnode with no cred"));
588  p2->p_tracecred = crhold(p1->p_tracecred);
589  }
590  }
591  mtx_unlock(&ktrace_mtx);
592  PROC_UNLOCK(p1);
593 
594  ktrprocctor(p2);
595 }
596 
597 /*
598  * When a thread returns, drain any asynchronous records generated by the
599  * system call.
600  */
601 void
602 ktruserret(struct thread *td)
603 {
604 
605  ktrace_enter(td);
606  sx_xlock(&ktrace_sx);
607  ktr_drain(td);
608  sx_xunlock(&ktrace_sx);
609  ktrace_exit(td);
610 }
611 
612 void
613 ktrnamei(path)
614  char *path;
615 {
616  struct ktr_request *req;
617  int namelen;
618  char *buf = NULL;
619 
620  namelen = strlen(path);
621  if (namelen > 0) {
622  buf = malloc(namelen, M_KTRACE, M_WAITOK);
623  bcopy(path, buf, namelen);
624  }
625  req = ktr_getrequest(KTR_NAMEI);
626  if (req == NULL) {
627  if (buf != NULL)
628  free(buf, M_KTRACE);
629  return;
630  }
631  if (namelen > 0) {
632  req->ktr_header.ktr_len = namelen;
633  req->ktr_buffer = buf;
634  }
635  ktr_submitrequest(curthread, req);
636 }
637 
638 void
639 ktrsysctl(name, namelen)
640  int *name;
641  u_int namelen;
642 {
643  struct ktr_request *req;
644  u_int mib[CTL_MAXNAME + 2];
645  char *mibname;
646  size_t mibnamelen;
647  int error;
648 
649  /* Lookup name of mib. */
650  KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long"));
651  mib[0] = 0;
652  mib[1] = 1;
653  bcopy(name, mib + 2, namelen * sizeof(*name));
654  mibnamelen = 128;
655  mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK);
656  error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen,
657  NULL, 0, &mibnamelen, 0);
658  if (error) {
659  free(mibname, M_KTRACE);
660  return;
661  }
662  req = ktr_getrequest(KTR_SYSCTL);
663  if (req == NULL) {
664  free(mibname, M_KTRACE);
665  return;
666  }
667  req->ktr_header.ktr_len = mibnamelen;
668  req->ktr_buffer = mibname;
669  ktr_submitrequest(curthread, req);
670 }
671 
672 void
673 ktrgenio(fd, rw, uio, error)
674  int fd;
675  enum uio_rw rw;
676  struct uio *uio;
677  int error;
678 {
679  struct ktr_request *req;
680  struct ktr_genio *ktg;
681  int datalen;
682  char *buf;
683 
684  if (error) {
685  free(uio, M_IOV);
686  return;
687  }
688  uio->uio_offset = 0;
689  uio->uio_rw = UIO_WRITE;
690  datalen = MIN(uio->uio_resid, ktr_geniosize);
691  buf = malloc(datalen, M_KTRACE, M_WAITOK);
692  error = uiomove(buf, datalen, uio);
693  free(uio, M_IOV);
694  if (error) {
695  free(buf, M_KTRACE);
696  return;
697  }
698  req = ktr_getrequest(KTR_GENIO);
699  if (req == NULL) {
700  free(buf, M_KTRACE);
701  return;
702  }
703  ktg = &req->ktr_data.ktr_genio;
704  ktg->ktr_fd = fd;
705  ktg->ktr_rw = rw;
706  req->ktr_header.ktr_len = datalen;
707  req->ktr_buffer = buf;
708  ktr_submitrequest(curthread, req);
709 }
710 
711 void
712 ktrpsig(sig, action, mask, code)
713  int sig;
714  sig_t action;
715  sigset_t *mask;
716  int code;
717 {
718  struct thread *td = curthread;
719  struct ktr_request *req;
720  struct ktr_psig *kp;
721 
722  req = ktr_getrequest(KTR_PSIG);
723  if (req == NULL)
724  return;
725  kp = &req->ktr_data.ktr_psig;
726  kp->signo = (char)sig;
727  kp->action = action;
728  kp->mask = *mask;
729  kp->code = code;
730  ktr_enqueuerequest(td, req);
731  ktrace_exit(td);
732 }
733 
734 void
735 ktrcsw(out, user, wmesg)
736  int out, user;
737  const char *wmesg;
738 {
739  struct thread *td = curthread;
740  struct ktr_request *req;
741  struct ktr_csw *kc;
742 
743  req = ktr_getrequest(KTR_CSW);
744  if (req == NULL)
745  return;
746  kc = &req->ktr_data.ktr_csw;
747  kc->out = out;
748  kc->user = user;
749  if (wmesg != NULL)
750  strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg));
751  else
752  bzero(kc->wmesg, sizeof(kc->wmesg));
753  ktr_enqueuerequest(td, req);
754  ktrace_exit(td);
755 }
756 
757 void
758 ktrstruct(name, data, datalen)
759  const char *name;
760  void *data;
761  size_t datalen;
762 {
763  struct ktr_request *req;
764  char *buf = NULL;
765  size_t buflen;
766 
767  if (!data)
768  datalen = 0;
769  buflen = strlen(name) + 1 + datalen;
770  buf = malloc(buflen, M_KTRACE, M_WAITOK);
771  strcpy(buf, name);
772  bcopy(data, buf + strlen(name) + 1, datalen);
773  if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) {
774  free(buf, M_KTRACE);
775  return;
776  }
777  req->ktr_buffer = buf;
778  req->ktr_header.ktr_len = buflen;
779  ktr_submitrequest(curthread, req);
780 }
781 
782 void
783 ktrfault(vaddr, type)
784  vm_offset_t vaddr;
785  int type;
786 {
787  struct thread *td = curthread;
788  struct ktr_request *req;
789  struct ktr_fault *kf;
790 
791  req = ktr_getrequest(KTR_FAULT);
792  if (req == NULL)
793  return;
794  kf = &req->ktr_data.ktr_fault;
795  kf->vaddr = vaddr;
796  kf->type = type;
797  ktr_enqueuerequest(td, req);
798  ktrace_exit(td);
799 }
800 
801 void
802 ktrfaultend(result)
803  int result;
804 {
805  struct thread *td = curthread;
806  struct ktr_request *req;
807  struct ktr_faultend *kf;
808 
809  req = ktr_getrequest(KTR_FAULTEND);
810  if (req == NULL)
811  return;
812  kf = &req->ktr_data.ktr_faultend;
813  kf->result = result;
814  ktr_enqueuerequest(td, req);
815  ktrace_exit(td);
816 }
817 #endif /* KTRACE */
818 
819 /* Interface and common routines */
820 
821 #ifndef _SYS_SYSPROTO_H_
822 struct ktrace_args {
823  char *fname;
824  int ops;
825  int facs;
826  int pid;
827 };
828 #endif
829 /* ARGSUSED */
830 int
831 sys_ktrace(td, uap)
832  struct thread *td;
833  register struct ktrace_args *uap;
834 {
835 #ifdef KTRACE
836  register struct vnode *vp = NULL;
837  register struct proc *p;
838  struct pgrp *pg;
839  int facs = uap->facs & ~KTRFAC_ROOT;
840  int ops = KTROP(uap->ops);
841  int descend = uap->ops & KTRFLAG_DESCEND;
842  int nfound, ret = 0;
843  int flags, error = 0, vfslocked;
844  struct nameidata nd;
845  struct ucred *cred;
846 
847  /*
848  * Need something to (un)trace.
849  */
850  if (ops != KTROP_CLEARFILE && facs == 0)
851  return (EINVAL);
852 
853  ktrace_enter(td);
854  if (ops != KTROP_CLEAR) {
855  /*
856  * an operation which requires a file argument.
857  */
858  NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
859  uap->fname, td);
860  flags = FREAD | FWRITE | O_NOFOLLOW;
861  error = vn_open(&nd, &flags, 0, NULL);
862  if (error) {
863  ktrace_exit(td);
864  return (error);
865  }
866  vfslocked = NDHASGIANT(&nd);
867  NDFREE(&nd, NDF_ONLY_PNBUF);
868  vp = nd.ni_vp;
869  VOP_UNLOCK(vp, 0);
870  if (vp->v_type != VREG) {
871  (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
872  VFS_UNLOCK_GIANT(vfslocked);
873  ktrace_exit(td);
874  return (EACCES);
875  }
876  VFS_UNLOCK_GIANT(vfslocked);
877  }
878  /*
879  * Clear all uses of the tracefile.
880  */
881  if (ops == KTROP_CLEARFILE) {
882  int vrele_count;
883 
884  vrele_count = 0;
885  sx_slock(&allproc_lock);
886  FOREACH_PROC_IN_SYSTEM(p) {
887  PROC_LOCK(p);
888  if (p->p_tracevp == vp) {
889  if (ktrcanset(td, p)) {
890  mtx_lock(&ktrace_mtx);
891  ktr_freeproc(p, &cred, NULL);
892  mtx_unlock(&ktrace_mtx);
893  vrele_count++;
894  crfree(cred);
895  } else
896  error = EPERM;
897  }
898  PROC_UNLOCK(p);
899  }
900  sx_sunlock(&allproc_lock);
901  if (vrele_count > 0) {
902  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
903  while (vrele_count-- > 0)
904  vrele(vp);
905  VFS_UNLOCK_GIANT(vfslocked);
906  }
907  goto done;
908  }
909  /*
910  * do it
911  */
912  sx_slock(&proctree_lock);
913  if (uap->pid < 0) {
914  /*
915  * by process group
916  */
917  pg = pgfind(-uap->pid);
918  if (pg == NULL) {
919  sx_sunlock(&proctree_lock);
920  error = ESRCH;
921  goto done;
922  }
923  /*
924  * ktrops() may call vrele(). Lock pg_members
925  * by the proctree_lock rather than pg_mtx.
926  */
927  PGRP_UNLOCK(pg);
928  nfound = 0;
929  LIST_FOREACH(p, &pg->pg_members, p_pglist) {
930  PROC_LOCK(p);
931  if (p->p_state == PRS_NEW ||
932  p_cansee(td, p) != 0) {
933  PROC_UNLOCK(p);
934  continue;
935  }
936  nfound++;
937  if (descend)
938  ret |= ktrsetchildren(td, p, ops, facs, vp);
939  else
940  ret |= ktrops(td, p, ops, facs, vp);
941  }
942  if (nfound == 0) {
943  sx_sunlock(&proctree_lock);
944  error = ESRCH;
945  goto done;
946  }
947  } else {
948  /*
949  * by pid
950  */
951  p = pfind(uap->pid);
952  if (p == NULL)
953  error = ESRCH;
954  else
955  error = p_cansee(td, p);
956  if (error) {
957  if (p != NULL)
958  PROC_UNLOCK(p);
959  sx_sunlock(&proctree_lock);
960  goto done;
961  }
962  if (descend)
963  ret |= ktrsetchildren(td, p, ops, facs, vp);
964  else
965  ret |= ktrops(td, p, ops, facs, vp);
966  }
967  sx_sunlock(&proctree_lock);
968  if (!ret)
969  error = EPERM;
970 done:
971  if (vp != NULL) {
972  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
973  (void) vn_close(vp, FWRITE, td->td_ucred, td);
974  VFS_UNLOCK_GIANT(vfslocked);
975  }
976  ktrace_exit(td);
977  return (error);
978 #else /* !KTRACE */
979  return (ENOSYS);
980 #endif /* KTRACE */
981 }
982 
983 /* ARGSUSED */
984 int
985 sys_utrace(td, uap)
986  struct thread *td;
987  register struct utrace_args *uap;
988 {
989 
990 #ifdef KTRACE
991  struct ktr_request *req;
992  void *cp;
993  int error;
994 
995  if (!KTRPOINT(td, KTR_USER))
996  return (0);
997  if (uap->len > KTR_USER_MAXLEN)
998  return (EINVAL);
999  cp = malloc(uap->len, M_KTRACE, M_WAITOK);
1000  error = copyin(uap->addr, cp, uap->len);
1001  if (error) {
1002  free(cp, M_KTRACE);
1003  return (error);
1004  }
1005  req = ktr_getrequest(KTR_USER);
1006  if (req == NULL) {
1007  free(cp, M_KTRACE);
1008  return (ENOMEM);
1009  }
1010  req->ktr_buffer = cp;
1011  req->ktr_header.ktr_len = uap->len;
1012  ktr_submitrequest(td, req);
1013  return (0);
1014 #else /* !KTRACE */
1015  return (ENOSYS);
1016 #endif /* KTRACE */
1017 }
1018 
1019 #ifdef KTRACE
1020 static int
1021 ktrops(td, p, ops, facs, vp)
1022  struct thread *td;
1023  struct proc *p;
1024  int ops, facs;
1025  struct vnode *vp;
1026 {
1027  struct vnode *tracevp = NULL;
1028  struct ucred *tracecred = NULL;
1029 
1030  PROC_LOCK_ASSERT(p, MA_OWNED);
1031  if (!ktrcanset(td, p)) {
1032  PROC_UNLOCK(p);
1033  return (0);
1034  }
1035  if (p->p_flag & P_WEXIT) {
1036  /* If the process is exiting, just ignore it. */
1037  PROC_UNLOCK(p);
1038  return (1);
1039  }
1040  mtx_lock(&ktrace_mtx);
1041  if (ops == KTROP_SET) {
1042  if (p->p_tracevp != vp) {
1043  /*
1044  * if trace file already in use, relinquish below
1045  */
1046  tracevp = p->p_tracevp;
1047  VREF(vp);
1048  p->p_tracevp = vp;
1049  }
1050  if (p->p_tracecred != td->td_ucred) {
1051  tracecred = p->p_tracecred;
1052  p->p_tracecred = crhold(td->td_ucred);
1053  }
1054  p->p_traceflag |= facs;
1055  if (priv_check(td, PRIV_KTRACE) == 0)
1056  p->p_traceflag |= KTRFAC_ROOT;
1057  } else {
1058  /* KTROP_CLEAR */
1059  if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0)
1060  /* no more tracing */
1061  ktr_freeproc(p, &tracecred, &tracevp);
1062  }
1063  mtx_unlock(&ktrace_mtx);
1064  if ((p->p_traceflag & KTRFAC_MASK) != 0)
1065  ktrprocctor_entered(td, p);
1066  PROC_UNLOCK(p);
1067  if (tracevp != NULL) {
1068  int vfslocked;
1069 
1070  vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
1071  vrele(tracevp);
1072  VFS_UNLOCK_GIANT(vfslocked);
1073  }
1074  if (tracecred != NULL)
1075  crfree(tracecred);
1076 
1077  return (1);
1078 }
1079 
1080 static int
1081 ktrsetchildren(td, top, ops, facs, vp)
1082  struct thread *td;
1083  struct proc *top;
1084  int ops, facs;
1085  struct vnode *vp;
1086 {
1087  register struct proc *p;
1088  register int ret = 0;
1089 
1090  p = top;
1091  PROC_LOCK_ASSERT(p, MA_OWNED);
1092  sx_assert(&proctree_lock, SX_LOCKED);
1093  for (;;) {
1094  ret |= ktrops(td, p, ops, facs, vp);
1095  /*
1096  * If this process has children, descend to them next,
1097  * otherwise do any siblings, and if done with this level,
1098  * follow back up the tree (but not past top).
1099  */
1100  if (!LIST_EMPTY(&p->p_children))
1101  p = LIST_FIRST(&p->p_children);
1102  else for (;;) {
1103  if (p == top)
1104  return (ret);
1105  if (LIST_NEXT(p, p_sibling)) {
1106  p = LIST_NEXT(p, p_sibling);
1107  break;
1108  }
1109  p = p->p_pptr;
1110  }
1111  PROC_LOCK(p);
1112  }
1113  /*NOTREACHED*/
1114 }
1115 
1116 static void
1117 ktr_writerequest(struct thread *td, struct ktr_request *req)
1118 {
1119  struct ktr_header *kth;
1120  struct vnode *vp;
1121  struct proc *p;
1122  struct ucred *cred;
1123  struct uio auio;
1124  struct iovec aiov[3];
1125  struct mount *mp;
1126  int datalen, buflen, vrele_count;
1127  int error, vfslocked;
1128 
1129  /*
1130  * We hold the vnode and credential for use in I/O in case ktrace is
1131  * disabled on the process as we write out the request.
1132  *
1133  * XXXRW: This is not ideal: we could end up performing a write after
1134  * the vnode has been closed.
1135  */
1136  mtx_lock(&ktrace_mtx);
1137  vp = td->td_proc->p_tracevp;
1138  cred = td->td_proc->p_tracecred;
1139 
1140  /*
1141  * If vp is NULL, the vp has been cleared out from under this
1142  * request, so just drop it. Make sure the credential and vnode are
1143  * in sync: we should have both or neither.
1144  */
1145  if (vp == NULL) {
1146  KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL"));
1147  mtx_unlock(&ktrace_mtx);
1148  return;
1149  }
1150  VREF(vp);
1151  KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL"));
1152  crhold(cred);
1153  mtx_unlock(&ktrace_mtx);
1154 
1155  kth = &req->ktr_header;
1156  KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) <
1157  sizeof(data_lengths) / sizeof(data_lengths[0]),
1158  ("data_lengths array overflow"));
1159  datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP];
1160  buflen = kth->ktr_len;
1161  auio.uio_iov = &aiov[0];
1162  auio.uio_offset = 0;
1163  auio.uio_segflg = UIO_SYSSPACE;
1164  auio.uio_rw = UIO_WRITE;
1165  aiov[0].iov_base = (caddr_t)kth;
1166  aiov[0].iov_len = sizeof(struct ktr_header);
1167  auio.uio_resid = sizeof(struct ktr_header);
1168  auio.uio_iovcnt = 1;
1169  auio.uio_td = td;
1170  if (datalen != 0) {
1171  aiov[1].iov_base = (caddr_t)&req->ktr_data;
1172  aiov[1].iov_len = datalen;
1173  auio.uio_resid += datalen;
1174  auio.uio_iovcnt++;
1175  kth->ktr_len += datalen;
1176  }
1177  if (buflen != 0) {
1178  KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write"));
1179  aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer;
1180  aiov[auio.uio_iovcnt].iov_len = buflen;
1181  auio.uio_resid += buflen;
1182  auio.uio_iovcnt++;
1183  }
1184 
1185  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1186  vn_start_write(vp, &mp, V_WAIT);
1187  vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1188 #ifdef MAC
1189  error = mac_vnode_check_write(cred, NOCRED, vp);
1190  if (error == 0)
1191 #endif
1192  error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
1193  VOP_UNLOCK(vp, 0);
1194  vn_finished_write(mp);
1195  crfree(cred);
1196  if (!error) {
1197  vrele(vp);
1198  VFS_UNLOCK_GIANT(vfslocked);
1199  return;
1200  }
1201  VFS_UNLOCK_GIANT(vfslocked);
1202 
1203  /*
1204  * If error encountered, give up tracing on this vnode. We defer
1205  * all the vrele()'s on the vnode until after we are finished walking
1206  * the various lists to avoid needlessly holding locks.
1207  * NB: at this point we still hold the vnode reference that must
1208  * not go away as we need the valid vnode to compare with. Thus let
1209  * vrele_count start at 1 and the reference will be freed
1210  * by the loop at the end after our last use of vp.
1211  */
1212  log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
1213  error);
1214  vrele_count = 1;
1215  /*
1216  * First, clear this vnode from being used by any processes in the
1217  * system.
1218  * XXX - If one process gets an EPERM writing to the vnode, should
1219  * we really do this? Other processes might have suitable
1220  * credentials for the operation.
1221  */
1222  cred = NULL;
1223  sx_slock(&allproc_lock);
1224  FOREACH_PROC_IN_SYSTEM(p) {
1225  PROC_LOCK(p);
1226  if (p->p_tracevp == vp) {
1227  mtx_lock(&ktrace_mtx);
1228  ktr_freeproc(p, &cred, NULL);
1229  mtx_unlock(&ktrace_mtx);
1230  vrele_count++;
1231  }
1232  PROC_UNLOCK(p);
1233  if (cred != NULL) {
1234  crfree(cred);
1235  cred = NULL;
1236  }
1237  }
1238  sx_sunlock(&allproc_lock);
1239 
1240  vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1241  while (vrele_count-- > 0)
1242  vrele(vp);
1243  VFS_UNLOCK_GIANT(vfslocked);
1244 }
1245 
1246 /*
1247  * Return true if caller has permission to set the ktracing state
1248  * of target. Essentially, the target can't possess any
1249  * more permissions than the caller. KTRFAC_ROOT signifies that
1250  * root previously set the tracing status on the target process, and
1251  * so, only root may further change it.
1252  */
1253 static int
1254 ktrcanset(td, targetp)
1255  struct thread *td;
1256  struct proc *targetp;
1257 {
1258 
1259  PROC_LOCK_ASSERT(targetp, MA_OWNED);
1260  if (targetp->p_traceflag & KTRFAC_ROOT &&
1261  priv_check(td, PRIV_KTRACE))
1262  return (0);
1263 
1264  if (p_candebug(td, targetp) != 0)
1265  return (0);
1266 
1267  return (1);
1268 }
1269 
1270 #endif /* KTRACE */
int fd
Definition: kern_exec.c:199
struct buf * buf
Definition: vfs_bio.c:97
char * path
TUNABLE_INT("kern.eventtimer.singlemul",&singlemul)
void NDFREE(struct nameidata *ndp, const u_int flags)
Definition: vfs_lookup.c:1091
static SYSCTL_NODE(_debug, OID_AUTO, cpufreq, CTLFLAG_RD, NULL,"cpufreq debugging")
int p_candebug(struct thread *td, struct proc *p)
Definition: kern_prot.c:1627
void * malloc(unsigned long size, struct malloc_type *mtp, int flags)
Definition: kern_malloc.c:454
CTASSERT(MAXSHELLCMDLEN >=MAXINTERP+3)
void vn_finished_write(struct mount *mp)
Definition: vfs_vnops.c:1599
SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL)
int sys_utrace(struct thread *td, struct utrace_args *uap)
Definition: kern_ktrace.c:985
const char * name
Definition: kern_fail.c:97
SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW,&idletick, 0,"Run periodic events when idle")
int * type
Definition: cpufreq_if.m:98
int sys_ktrace(struct thread *td, struct ktrace_args *uap)
Definition: kern_ktrace.c:831
static STAILQ_HEAD(cn_device)
Definition: kern_cons.c:82
__FBSDID("$BSDSUniX$")
int priv_check(struct thread *td, int priv)
Definition: kern_priv.c:170
struct proc * pfind(pid_t pid)
Definition: kern_proc.c:304
struct sx allproc_lock
Definition: kern_proc.c:136
char * fname
Definition: kern_ktrace.c:823
static int dummy
int kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags)
Definition: kern_sysctl.c:1232
void crfree(struct ucred *cr)
Definition: kern_prot.c:1835
int mask
Definition: subr_acl_nfs4.c:67
static MALLOC_DEFINE(M_KTRACE,"KTRACE","KTRACE")
void log(int level, const char *fmt,...)
Definition: subr_prf.c:289
struct pgrp * pgfind(pid_t pgid)
Definition: kern_proc.c:342
SYSCTL_PROC(_kern, OID_AUTO, acct_chkfreq, CTLTYPE_INT|CTLFLAG_RW,&acctchkfreq, 0, sysctl_acct_chkfreq,"I","frequency for checking the free space")
struct ucred * crhold(struct ucred *cr)
Definition: kern_prot.c:1824
int uiomove(void *cp, int n, struct uio *uio)
Definition: subr_uio.c:202
void free(void *addr, struct malloc_type *mtp)
Definition: kern_malloc.c:554
int vn_close(struct vnode *vp, int flags, struct ucred *file_cred, struct thread *td)
Definition: vfs_vnops.c:303
int printf(const char *fmt,...)
Definition: subr_prf.c:367
linker_file_t * result
Definition: linker_if.m:136
void mtx_init(struct mtx *m, const char *name, const char *type, int opts)
Definition: kern_mutex.c:837
void vrele(struct vnode *vp)
Definition: vfs_subr.c:2416
int vn_start_write(struct vnode *vp, struct mount **mpp, int flags)
Definition: vfs_vnops.c:1491
struct sx proctree_lock
Definition: kern_proc.c:137
void microtime(struct timeval *tvp)
Definition: kern_tc.c:220
int vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp)
Definition: vfs_vnops.c:106
FEATURE(kdtrace_hooks,"Kernel DTrace hooks which are required to load DTrace kernel modules")
int p_cansee(struct thread *td, struct proc *p)
Definition: kern_prot.c:1426