FreeBSD kernel kern code
imgact_elf.c
Go to the documentation of this file.
1 /*-
2  * Copyright (c) 2000 David O'Brien
3  * Copyright (c) 1995-1996 Søren Schmidt
4  * Copyright (c) 1996 Peter Wemm
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer
12  * in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in the
15  * documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  * derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$BSDSUniX$");
33 
34 #include "opt_capsicum.h"
35 #include "opt_compat.h"
36 #include "opt_core.h"
37 
38 #include <sys/param.h>
39 #include <sys/capability.h>
40 #include <sys/exec.h>
41 #include <sys/fcntl.h>
42 #include <sys/filedesc.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mount.h>
49 #include <sys/mutex.h>
50 #include <sys/mman.h>
51 #include <sys/namei.h>
52 #include <sys/pioctl.h>
53 #include <sys/proc.h>
54 #include <sys/procfs.h>
55 #include <sys/racct.h>
56 #include <sys/resourcevar.h>
57 #include <sys/sbuf.h>
58 #include <sys/sf_buf.h>
59 #include <sys/smp.h>
60 #include <sys/systm.h>
61 #include <sys/signalvar.h>
62 #include <sys/stat.h>
63 #include <sys/sx.h>
64 #include <sys/syscall.h>
65 #include <sys/sysctl.h>
66 #include <sys/sysent.h>
67 #include <sys/vnode.h>
68 #include <sys/syslog.h>
69 #include <sys/eventhandler.h>
70 #include <sys/user.h>
71 
72 #include <net/zlib.h>
73 
74 #include <vm/vm.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_param.h>
77 #include <vm/pmap.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_extern.h>
81 
82 #include <machine/elf.h>
83 #include <machine/md_var.h>
84 
85 #define ELF_NOTE_ROUNDSIZE 4
86 #define OLD_EI_BRAND 8
87 
88 static int __elfN(check_header)(const Elf_Ehdr *hdr);
89 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
90  const char *interp, int interp_name_len, int32_t *osrel);
91 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
92  u_long *entry, size_t pagesize);
93 static int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
94  vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
95  vm_prot_t prot, size_t pagesize);
96 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
97 static boolean_t __elfN(bsdsunix_trans_osrel)(const Elf_Note *note,
98  int32_t *osrel);
99 static boolean_t kbsdsunix_trans_osrel(const Elf_Note *note, int32_t *osrel);
100 static boolean_t __elfN(check_note)(struct image_params *imgp,
101  Elf_Brandnote *checknote, int32_t *osrel);
102 static vm_prot_t __elfN(trans_prot)(Elf_Word);
103 static Elf_Word __elfN(untrans_prot)(vm_prot_t);
104 
105 SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
106  "");
107 
108 #ifdef COMPRESS_USER_CORES
109 static int compress_core(gzFile, char *, char *, unsigned int,
110  struct thread * td);
111 #endif
112 #define CORE_BUF_SIZE (16 * 1024)
113 
114 int __elfN(fallback_brand) = -1;
115 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
116  fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
117  __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
118 TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
119  &__elfN(fallback_brand));
120 
121 static int elf_legacy_coredump = 0;
122 SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
123  &elf_legacy_coredump, 0, "");
124 
125 int __elfN(nxstack) =
126 #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */
127  1;
128 #else
129  0;
130 #endif
131 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
132  nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
133  __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack");
134 
135 #if __ELF_WORD_SIZE == 32
136 #if defined(__amd64__) || defined(__ia64__)
137 int i386_read_exec = 0;
138 SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
139  "enable execution from readable segments");
140 #endif
141 #endif
142 
143 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
144 
145 #define trunc_page_ps(va, ps) ((va) & ~(ps - 1))
146 #define round_page_ps(va, ps) (((va) + (ps - 1)) & ~(ps - 1))
147 #define aligned(a, t) (trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
148 
149 static const char BSDSUNIX_ABI_VENDOR[] = "BSDSUniX";
150 
151 Elf_Brandnote __elfN(bsdsunix_brandnote) = {
152  .hdr.n_namesz = sizeof(BSDSUNIX_ABI_VENDOR),
153  .hdr.n_descsz = sizeof(int32_t),
154  .hdr.n_type = 1,
155  .vendor = BSDSUNIX_ABI_VENDOR,
156  .flags = BN_TRANSLATE_OSREL,
157  .trans_osrel = __elfN(bsdsunix_trans_osrel)
158 };
159 
160 static boolean_t
161 __elfN(bsdsunix_trans_osrel)(const Elf_Note *note, int32_t *osrel)
162 {
163  uintptr_t p;
164 
165  p = (uintptr_t)(note + 1);
166  p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
167  *osrel = *(const int32_t *)(p);
168 
169  return (TRUE);
170 }
171 
172 static const char GNU_ABI_VENDOR[] = "GNU";
173 static int GNU_KBSDSUNIX_ABI_DESC = 3;
174 
175 Elf_Brandnote __elfN(kbsdsunix_brandnote) = {
176  .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
177  .hdr.n_descsz = 16, /* XXX at least 16 */
178  .hdr.n_type = 1,
179  .vendor = GNU_ABI_VENDOR,
180  .flags = BN_TRANSLATE_OSREL,
181  .trans_osrel = kbsdsunix_trans_osrel
182 };
183 
184 static boolean_t
185 kbsdsunix_trans_osrel(const Elf_Note *note, int32_t *osrel)
186 {
187  const Elf32_Word *desc;
188  uintptr_t p;
189 
190  p = (uintptr_t)(note + 1);
191  p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
192 
193  desc = (const Elf32_Word *)p;
194  if (desc[0] != GNU_KBSDSUNIX_ABI_DESC)
195  return (FALSE);
196 
197  /*
198  * Debian GNU/kFreeBSD embed the earliest compatible kernel version
199  * (__BSDSUniX_version: <major><two digit minor>Rxx) in the LSB way.
200  */
201  *osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];
202 
203  return (TRUE);
204 }
205 
206 int
207 __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
208 {
209  int i;
210 
211  for (i = 0; i < MAX_BRANDS; i++) {
212  if (elf_brand_list[i] == NULL) {
213  elf_brand_list[i] = entry;
214  break;
215  }
216  }
217  if (i == MAX_BRANDS) {
218  printf("WARNING: %s: could not insert brandinfo entry: %p\n",
219  __func__, entry);
220  return (-1);
221  }
222  return (0);
223 }
224 
225 int
226 __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
227 {
228  int i;
229 
230  for (i = 0; i < MAX_BRANDS; i++) {
231  if (elf_brand_list[i] == entry) {
232  elf_brand_list[i] = NULL;
233  break;
234  }
235  }
236  if (i == MAX_BRANDS)
237  return (-1);
238  return (0);
239 }
240 
241 int
242 __elfN(brand_inuse)(Elf_Brandinfo *entry)
243 {
244  struct proc *p;
245  int rval = FALSE;
246 
247  sx_slock(&allproc_lock);
248  FOREACH_PROC_IN_SYSTEM(p) {
249  if (p->p_sysent == entry->sysvec) {
250  rval = TRUE;
251  break;
252  }
253  }
254  sx_sunlock(&allproc_lock);
255 
256  return (rval);
257 }
258 
259 static Elf_Brandinfo *
260 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
261  int interp_name_len, int32_t *osrel)
262 {
263  const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
264  Elf_Brandinfo *bi;
265  boolean_t ret;
266  int i;
267 
268  /*
269  * We support four types of branding -- (1) the ELF EI_OSABI field
270  * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
271  * branding w/in the ELF header, (3) path of the `interp_path'
272  * field, and (4) the ".note.ABI-tag" ELF section.
273  */
274 
275  /* Look for an ".note.ABI-tag" ELF section */
276  for (i = 0; i < MAX_BRANDS; i++) {
277  bi = elf_brand_list[i];
278  if (bi == NULL)
279  continue;
280  if (hdr->e_machine == bi->machine && (bi->flags &
281  (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) {
282  ret = __elfN(check_note)(imgp, bi->brand_note, osrel);
283  if (ret)
284  return (bi);
285  }
286  }
287 
288  /* If the executable has a brand, search for it in the brand list. */
289  for (i = 0; i < MAX_BRANDS; i++) {
290  bi = elf_brand_list[i];
291  if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
292  continue;
293  if (hdr->e_machine == bi->machine &&
294  (hdr->e_ident[EI_OSABI] == bi->brand ||
295  strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
296  bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
297  return (bi);
298  }
299 
300  /* Lacking a known brand, search for a recognized interpreter. */
301  if (interp != NULL) {
302  for (i = 0; i < MAX_BRANDS; i++) {
303  bi = elf_brand_list[i];
304  if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
305  continue;
306  if (hdr->e_machine == bi->machine &&
307  /* ELF image p_filesz includes terminating zero */
308  strlen(bi->interp_path) + 1 == interp_name_len &&
309  strncmp(interp, bi->interp_path, interp_name_len)
310  == 0)
311  return (bi);
312  }
313  }
314 
315  /* Lacking a recognized interpreter, try the default brand */
316  for (i = 0; i < MAX_BRANDS; i++) {
317  bi = elf_brand_list[i];
318  if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
319  continue;
320  if (hdr->e_machine == bi->machine &&
321  __elfN(fallback_brand) == bi->brand)
322  return (bi);
323  }
324  return (NULL);
325 }
326 
327 static int
328 __elfN(check_header)(const Elf_Ehdr *hdr)
329 {
330  Elf_Brandinfo *bi;
331  int i;
332 
333  if (!IS_ELF(*hdr) ||
334  hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
335  hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
336  hdr->e_ident[EI_VERSION] != EV_CURRENT ||
337  hdr->e_phentsize != sizeof(Elf_Phdr) ||
338  hdr->e_version != ELF_TARG_VER)
339  return (ENOEXEC);
340 
341  /*
342  * Make sure we have at least one brand for this machine.
343  */
344 
345  for (i = 0; i < MAX_BRANDS; i++) {
346  bi = elf_brand_list[i];
347  if (bi != NULL && bi->machine == hdr->e_machine)
348  break;
349  }
350  if (i == MAX_BRANDS)
351  return (ENOEXEC);
352 
353  return (0);
354 }
355 
356 static int
357 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
358  vm_offset_t start, vm_offset_t end, vm_prot_t prot)
359 {
360  struct sf_buf *sf;
361  int error;
362  vm_offset_t off;
363 
364  /*
365  * Create the page if it doesn't exist yet. Ignore errors.
366  */
367  vm_map_lock(map);
368  vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
369  VM_PROT_ALL, VM_PROT_ALL, 0);
370  vm_map_unlock(map);
371 
372  /*
373  * Find the page from the underlying object.
374  */
375  if (object) {
376  sf = vm_imgact_map_page(object, offset);
377  if (sf == NULL)
378  return (KERN_FAILURE);
379  off = offset - trunc_page(offset);
380  error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
381  end - start);
382  vm_imgact_unmap_page(sf);
383  if (error) {
384  return (KERN_FAILURE);
385  }
386  }
387 
388  return (KERN_SUCCESS);
389 }
390 
391 static int
392 __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
393  vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
394 {
395  struct sf_buf *sf;
396  vm_offset_t off;
397  vm_size_t sz;
398  int error, rv;
399 
400  if (start != trunc_page(start)) {
401  rv = __elfN(map_partial)(map, object, offset, start,
402  round_page(start), prot);
403  if (rv)
404  return (rv);
405  offset += round_page(start) - start;
406  start = round_page(start);
407  }
408  if (end != round_page(end)) {
409  rv = __elfN(map_partial)(map, object, offset +
410  trunc_page(end) - start, trunc_page(end), end, prot);
411  if (rv)
412  return (rv);
413  end = trunc_page(end);
414  }
415  if (end > start) {
416  if (offset & PAGE_MASK) {
417  /*
418  * The mapping is not page aligned. This means we have
419  * to copy the data. Sigh.
420  */
421  rv = vm_map_find(map, NULL, 0, &start, end - start,
422  FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
423  if (rv)
424  return (rv);
425  if (object == NULL)
426  return (KERN_SUCCESS);
427  for (; start < end; start += sz) {
428  sf = vm_imgact_map_page(object, offset);
429  if (sf == NULL)
430  return (KERN_FAILURE);
431  off = offset - trunc_page(offset);
432  sz = end - start;
433  if (sz > PAGE_SIZE - off)
434  sz = PAGE_SIZE - off;
435  error = copyout((caddr_t)sf_buf_kva(sf) + off,
436  (caddr_t)start, sz);
437  vm_imgact_unmap_page(sf);
438  if (error) {
439  return (KERN_FAILURE);
440  }
441  offset += sz;
442  }
443  rv = KERN_SUCCESS;
444  } else {
445  vm_object_reference(object);
446  vm_map_lock(map);
447  rv = vm_map_insert(map, object, offset, start, end,
448  prot, VM_PROT_ALL, cow);
449  vm_map_unlock(map);
450  if (rv != KERN_SUCCESS)
451  vm_object_deallocate(object);
452  }
453  return (rv);
454  } else {
455  return (KERN_SUCCESS);
456  }
457 }
458 
459 static int
460 __elfN(load_section)(struct vmspace *vmspace,
461  vm_object_t object, vm_offset_t offset,
462  caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
463  size_t pagesize)
464 {
465  struct sf_buf *sf;
466  size_t map_len;
467  vm_offset_t map_addr;
468  int error, rv, cow;
469  size_t copy_len;
470  vm_offset_t file_addr;
471 
472  /*
473  * It's necessary to fail if the filsz + offset taken from the
474  * header is greater than the actual file pager object's size.
475  * If we were to allow this, then the vm_map_find() below would
476  * walk right off the end of the file object and into the ether.
477  *
478  * While I'm here, might as well check for something else that
479  * is invalid: filsz cannot be greater than memsz.
480  */
481  if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
482  filsz > memsz) {
483  uprintf("elf_load_section: truncated ELF file\n");
484  return (ENOEXEC);
485  }
486 
487  map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
488  file_addr = trunc_page_ps(offset, pagesize);
489 
490  /*
491  * We have two choices. We can either clear the data in the last page
492  * of an oversized mapping, or we can start the anon mapping a page
493  * early and copy the initialized data into that first page. We
494  * choose the second..
495  */
496  if (memsz > filsz)
497  map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
498  else
499  map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
500 
501  if (map_len != 0) {
502  /* cow flags: don't dump readonly sections in core */
503  cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
504  (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
505 
506  rv = __elfN(map_insert)(&vmspace->vm_map,
507  object,
508  file_addr, /* file offset */
509  map_addr, /* virtual start */
510  map_addr + map_len,/* virtual end */
511  prot,
512  cow);
513  if (rv != KERN_SUCCESS)
514  return (EINVAL);
515 
516  /* we can stop now if we've covered it all */
517  if (memsz == filsz) {
518  return (0);
519  }
520  }
521 
522 
523  /*
524  * We have to get the remaining bit of the file into the first part
525  * of the oversized map segment. This is normally because the .data
526  * segment in the file is extended to provide bss. It's a neat idea
527  * to try and save a page, but it's a pain in the behind to implement.
528  */
529  copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
530  map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
531  map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
532  map_addr;
533 
534  /* This had damn well better be true! */
535  if (map_len != 0) {
536  rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
537  map_addr + map_len, VM_PROT_ALL, 0);
538  if (rv != KERN_SUCCESS) {
539  return (EINVAL);
540  }
541  }
542 
543  if (copy_len != 0) {
544  vm_offset_t off;
545 
546  sf = vm_imgact_map_page(object, offset + filsz);
547  if (sf == NULL)
548  return (EIO);
549 
550  /* send the page fragment to user space */
551  off = trunc_page_ps(offset + filsz, pagesize) -
552  trunc_page(offset + filsz);
553  error = copyout((caddr_t)sf_buf_kva(sf) + off,
554  (caddr_t)map_addr, copy_len);
555  vm_imgact_unmap_page(sf);
556  if (error) {
557  return (error);
558  }
559  }
560 
561  /*
562  * set it to the specified protection.
563  * XXX had better undo the damage from pasting over the cracks here!
564  */
565  vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
566  round_page(map_addr + map_len), prot, FALSE);
567 
568  return (0);
569 }
570 
571 /*
572  * Load the file "file" into memory. It may be either a shared object
573  * or an executable.
574  *
575  * The "addr" reference parameter is in/out. On entry, it specifies
576  * the address where a shared object should be loaded. If the file is
577  * an executable, this value is ignored. On exit, "addr" specifies
578  * where the file was actually loaded.
579  *
580  * The "entry" reference parameter is out only. On exit, it specifies
581  * the entry point for the loaded file.
582  */
583 static int
584 __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
585  u_long *entry, size_t pagesize)
586 {
587  struct {
588  struct nameidata nd;
589  struct vattr attr;
590  struct image_params image_params;
591  } *tempdata;
592  const Elf_Ehdr *hdr = NULL;
593  const Elf_Phdr *phdr = NULL;
594  struct nameidata *nd;
595  struct vmspace *vmspace = p->p_vmspace;
596  struct vattr *attr;
597  struct image_params *imgp;
598  vm_prot_t prot;
599  u_long rbase;
600  u_long base_addr = 0;
601  int vfslocked, error, i, numsegs;
602 
603 #ifdef CAPABILITY_MODE
604  /*
605  * XXXJA: This check can go away once we are sufficiently confident
606  * that the checks in namei() are correct.
607  */
608  if (IN_CAPABILITY_MODE(curthread))
609  return (ECAPMODE);
610 #endif
611 
612  tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
613  nd = &tempdata->nd;
614  attr = &tempdata->attr;
615  imgp = &tempdata->image_params;
616 
617  /*
618  * Initialize part of the common data
619  */
620  imgp->proc = p;
621  imgp->attr = attr;
622  imgp->firstpage = NULL;
623  imgp->image_header = NULL;
624  imgp->object = NULL;
625  imgp->execlabel = NULL;
626 
627  NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
628  curthread);
629  vfslocked = 0;
630  if ((error = namei(nd)) != 0) {
631  nd->ni_vp = NULL;
632  goto fail;
633  }
634  vfslocked = NDHASGIANT(nd);
635  NDFREE(nd, NDF_ONLY_PNBUF);
636  imgp->vp = nd->ni_vp;
637 
638  /*
639  * Check permissions, modes, uid, etc on the file, and "open" it.
640  */
641  error = exec_check_permissions(imgp);
642  if (error)
643  goto fail;
644 
645  error = exec_map_first_page(imgp);
646  if (error)
647  goto fail;
648 
649  /*
650  * Also make certain that the interpreter stays the same, so set
651  * its VV_TEXT flag, too.
652  */
653  VOP_SET_TEXT(nd->ni_vp);
654 
655  imgp->object = nd->ni_vp->v_object;
656 
657  hdr = (const Elf_Ehdr *)imgp->image_header;
658  if ((error = __elfN(check_header)(hdr)) != 0)
659  goto fail;
660  if (hdr->e_type == ET_DYN)
661  rbase = *addr;
662  else if (hdr->e_type == ET_EXEC)
663  rbase = 0;
664  else {
665  error = ENOEXEC;
666  goto fail;
667  }
668 
669  /* Only support headers that fit within first page for now */
670  if ((hdr->e_phoff > PAGE_SIZE) ||
671  (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
672  error = ENOEXEC;
673  goto fail;
674  }
675 
676  phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
677  if (!aligned(phdr, Elf_Addr)) {
678  error = ENOEXEC;
679  goto fail;
680  }
681 
682  for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
683  if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) {
684  /* Loadable segment */
685  prot = __elfN(trans_prot)(phdr[i].p_flags);
686  if ((error = __elfN(load_section)(vmspace,
687  imgp->object, phdr[i].p_offset,
688  (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
689  phdr[i].p_memsz, phdr[i].p_filesz, prot,
690  pagesize)) != 0)
691  goto fail;
692  /*
693  * Establish the base address if this is the
694  * first segment.
695  */
696  if (numsegs == 0)
697  base_addr = trunc_page(phdr[i].p_vaddr +
698  rbase);
699  numsegs++;
700  }
701  }
702  *addr = base_addr;
703  *entry = (unsigned long)hdr->e_entry + rbase;
704 
705 fail:
706  if (imgp->firstpage)
707  exec_unmap_first_page(imgp);
708 
709  if (nd->ni_vp)
710  vput(nd->ni_vp);
711 
712  VFS_UNLOCK_GIANT(vfslocked);
713  free(tempdata, M_TEMP);
714 
715  return (error);
716 }
717 
718 static int
719 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
720 {
721  const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
722  const Elf_Phdr *phdr;
723  Elf_Auxargs *elf_auxargs;
724  struct vmspace *vmspace;
725  vm_prot_t prot;
726  u_long text_size = 0, data_size = 0, total_size = 0;
727  u_long text_addr = 0, data_addr = 0;
728  u_long seg_size, seg_addr;
729  u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0;
730  int32_t osrel = 0;
731  int error = 0, i, n, interp_name_len = 0;
732  const char *interp = NULL, *newinterp = NULL;
733  Elf_Brandinfo *brand_info;
734  char *path;
735  struct sysentvec *sv;
736 
737  /*
738  * Do we have a valid ELF header ?
739  *
740  * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
741  * if particular brand doesn't support it.
742  */
743  if (__elfN(check_header)(hdr) != 0 ||
744  (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
745  return (-1);
746 
747  /*
748  * From here on down, we return an errno, not -1, as we've
749  * detected an ELF file.
750  */
751 
752  if ((hdr->e_phoff > PAGE_SIZE) ||
753  (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
754  /* Only support headers in first page for now */
755  return (ENOEXEC);
756  }
757  phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
758  if (!aligned(phdr, Elf_Addr))
759  return (ENOEXEC);
760  n = 0;
761  baddr = 0;
762  for (i = 0; i < hdr->e_phnum; i++) {
763  switch (phdr[i].p_type) {
764  case PT_LOAD:
765  if (n == 0)
766  baddr = phdr[i].p_vaddr;
767  n++;
768  break;
769  case PT_INTERP:
770  /* Path to interpreter */
771  if (phdr[i].p_filesz > MAXPATHLEN ||
772  phdr[i].p_offset > PAGE_SIZE ||
773  phdr[i].p_filesz > PAGE_SIZE - phdr[i].p_offset)
774  return (ENOEXEC);
775  interp = imgp->image_header + phdr[i].p_offset;
776  interp_name_len = phdr[i].p_filesz;
777  break;
778  case PT_GNU_STACK:
779  if (__elfN(nxstack))
780  imgp->stack_prot =
781  __elfN(trans_prot)(phdr[i].p_flags);
782  break;
783  }
784  }
785 
786  brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len,
787  &osrel);
788  if (brand_info == NULL) {
789  uprintf("ELF binary type \"%u\" not known.\n",
790  hdr->e_ident[EI_OSABI]);
791  return (ENOEXEC);
792  }
793  if (hdr->e_type == ET_DYN) {
794  if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0)
795  return (ENOEXEC);
796  /*
797  * Honour the base load address from the dso if it is
798  * non-zero for some reason.
799  */
800  if (baddr == 0)
801  et_dyn_addr = ET_DYN_LOAD_ADDR;
802  else
803  et_dyn_addr = 0;
804  } else
805  et_dyn_addr = 0;
806  sv = brand_info->sysvec;
807  if (interp != NULL && brand_info->interp_newpath != NULL)
808  newinterp = brand_info->interp_newpath;
809 
810  /*
811  * Avoid a possible deadlock if the current address space is destroyed
812  * and that address space maps the locked vnode. In the common case,
813  * the locked vnode's v_usecount is decremented but remains greater
814  * than zero. Consequently, the vnode lock is not needed by vrele().
815  * However, in cases where the vnode lock is external, such as nullfs,
816  * v_usecount may become zero.
817  */
818  VOP_UNLOCK(imgp->vp, 0);
819 
820  error = exec_new_vmspace(imgp, sv);
821  imgp->proc->p_sysent = sv;
822 
823  vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
824  if (error)
825  return (error);
826 
827  vmspace = imgp->proc->p_vmspace;
828 
829  for (i = 0; i < hdr->e_phnum; i++) {
830  switch (phdr[i].p_type) {
831  case PT_LOAD: /* Loadable segment */
832  if (phdr[i].p_memsz == 0)
833  break;
834  prot = __elfN(trans_prot)(phdr[i].p_flags);
835  if ((error = __elfN(load_section)(vmspace,
836  imgp->object, phdr[i].p_offset,
837  (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr,
838  phdr[i].p_memsz, phdr[i].p_filesz, prot,
839  sv->sv_pagesize)) != 0)
840  return (error);
841 
842  /*
843  * If this segment contains the program headers,
844  * remember their virtual address for the AT_PHDR
845  * aux entry. Static binaries don't usually include
846  * a PT_PHDR entry.
847  */
848  if (phdr[i].p_offset == 0 &&
849  hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
850  <= phdr[i].p_filesz)
851  proghdr = phdr[i].p_vaddr + hdr->e_phoff +
852  et_dyn_addr;
853 
854  seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr);
855  seg_size = round_page(phdr[i].p_memsz +
856  phdr[i].p_vaddr + et_dyn_addr - seg_addr);
857 
858  /*
859  * Make the largest executable segment the official
860  * text segment and all others data.
861  *
862  * Note that obreak() assumes that data_addr +
863  * data_size == end of data load area, and the ELF
864  * file format expects segments to be sorted by
865  * address. If multiple data segments exist, the
866  * last one will be used.
867  */
868 
869  if (phdr[i].p_flags & PF_X && text_size < seg_size) {
870  text_size = seg_size;
871  text_addr = seg_addr;
872  } else {
873  data_size = seg_size;
874  data_addr = seg_addr;
875  }
876  total_size += seg_size;
877  break;
878  case PT_PHDR: /* Program header table info */
879  proghdr = phdr[i].p_vaddr + et_dyn_addr;
880  break;
881  default:
882  break;
883  }
884  }
885 
886  if (data_addr == 0 && data_size == 0) {
887  data_addr = text_addr;
888  data_size = text_size;
889  }
890 
891  entry = (u_long)hdr->e_entry + et_dyn_addr;
892 
893  /*
894  * Check limits. It should be safe to check the
895  * limits after loading the segments since we do
896  * not actually fault in all the segments pages.
897  */
898  PROC_LOCK(imgp->proc);
899  if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
900  text_size > maxtsiz ||
901  total_size > lim_cur(imgp->proc, RLIMIT_VMEM) ||
902  racct_set(imgp->proc, RACCT_DATA, data_size) != 0 ||
903  racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) {
904  PROC_UNLOCK(imgp->proc);
905  return (ENOMEM);
906  }
907 
908  vmspace->vm_tsize = text_size >> PAGE_SHIFT;
909  vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
910  vmspace->vm_dsize = data_size >> PAGE_SHIFT;
911  vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
912 
913  /*
914  * We load the dynamic linker where a userland call
915  * to mmap(0, ...) would put it. The rationale behind this
916  * calculation is that it leaves room for the heap to grow to
917  * its maximum allowed size.
918  */
919  addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
920  lim_max(imgp->proc, RLIMIT_DATA));
921  PROC_UNLOCK(imgp->proc);
922 
923  imgp->entry_addr = entry;
924 
925  if (interp != NULL) {
926  int have_interp = FALSE;
927  VOP_UNLOCK(imgp->vp, 0);
928  if (brand_info->emul_path != NULL &&
929  brand_info->emul_path[0] != '\0') {
930  path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
931  snprintf(path, MAXPATHLEN, "%s%s",
932  brand_info->emul_path, interp);
933  error = __elfN(load_file)(imgp->proc, path, &addr,
934  &imgp->entry_addr, sv->sv_pagesize);
935  free(path, M_TEMP);
936  if (error == 0)
937  have_interp = TRUE;
938  }
939  if (!have_interp && newinterp != NULL) {
940  error = __elfN(load_file)(imgp->proc, newinterp, &addr,
941  &imgp->entry_addr, sv->sv_pagesize);
942  if (error == 0)
943  have_interp = TRUE;
944  }
945  if (!have_interp) {
946  error = __elfN(load_file)(imgp->proc, interp, &addr,
947  &imgp->entry_addr, sv->sv_pagesize);
948  }
949  vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
950  if (error != 0) {
951  uprintf("ELF interpreter %s not found\n", interp);
952  return (error);
953  }
954  } else
955  addr = et_dyn_addr;
956 
957  /*
958  * Construct auxargs table (used by the fixup routine)
959  */
960  elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
961  elf_auxargs->execfd = -1;
962  elf_auxargs->phdr = proghdr;
963  elf_auxargs->phent = hdr->e_phentsize;
964  elf_auxargs->phnum = hdr->e_phnum;
965  elf_auxargs->pagesz = PAGE_SIZE;
966  elf_auxargs->base = addr;
967  elf_auxargs->flags = 0;
968  elf_auxargs->entry = entry;
969 
970  imgp->auxargs = elf_auxargs;
971  imgp->interpreted = 0;
972  imgp->reloc_base = addr;
973  imgp->proc->p_osrel = osrel;
974 
975  return (error);
976 }
977 
978 #define suword __CONCAT(suword, __ELF_WORD_SIZE)
979 
980 int
981 __elfN(bsdsunix_fixup)(register_t **stack_base, struct image_params *imgp)
982 {
983  Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
984  Elf_Addr *base;
985  Elf_Addr *pos;
986 
987  base = (Elf_Addr *)*stack_base;
988  pos = base + (imgp->args->argc + imgp->args->envc + 2);
989 
990  if (args->execfd != -1)
991  AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
992  AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
993  AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
994  AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
995  AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
996  AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
997  AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
998  AUXARGS_ENTRY(pos, AT_BASE, args->base);
999  if (imgp->execpathp != 0)
1000  AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp);
1001  AUXARGS_ENTRY(pos, AT_OSRELDATE, osreldate);
1002  if (imgp->canary != 0) {
1003  AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary);
1004  AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen);
1005  }
1006  AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus);
1007  if (imgp->pagesizes != 0) {
1008  AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes);
1009  AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen);
1010  }
1011  if (imgp->sysent->sv_timekeep_base != 0) {
1012  AUXARGS_ENTRY(pos, AT_TIMEKEEP,
1013  imgp->sysent->sv_timekeep_base);
1014  }
1015  AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj
1016  != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
1017  imgp->sysent->sv_stackprot);
1018  AUXARGS_ENTRY(pos, AT_NULL, 0);
1019 
1020  free(imgp->auxargs, M_TEMP);
1021  imgp->auxargs = NULL;
1022 
1023  base--;
1024  suword(base, (long)imgp->args->argc);
1025  *stack_base = (register_t *)base;
1026  return (0);
1027 }
1028 
1029 /*
1030  * Code for generating ELF core dumps.
1031  */
1032 
1033 typedef void (*segment_callback)(vm_map_entry_t, void *);
1034 
1035 /* Closure for cb_put_phdr(). */
1037  Elf_Phdr *phdr; /* Program header to fill in */
1038  Elf_Off offset; /* Offset of segment in core file */
1039 };
1040 
1041 /* Closure for cb_size_segment(). */
1043  int count; /* Count of writable segments. */
1044  size_t size; /* Total size of all writable segments. */
1045 };
1046 
1047 typedef void (*outfunc_t)(void *, struct sbuf *, size_t *);
1048 
1049 struct note_info {
1050  int type; /* Note type. */
1051  outfunc_t outfunc; /* Output function. */
1052  void *outarg; /* Argument for the output function. */
1053  size_t outsize; /* Output size. */
1054  TAILQ_ENTRY(note_info) link; /* Link to the next note info. */
1055 };
1056 
1057 TAILQ_HEAD(note_info_list, note_info);
1058 
1059 static void cb_put_phdr(vm_map_entry_t, void *);
1060 static void cb_size_segment(vm_map_entry_t, void *);
1061 static void each_writable_segment(struct thread *, segment_callback, void *);
1062 static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
1063  int, void *, size_t, struct note_info_list *, size_t, gzFile);
1064 static void __elfN(prepare_notes)(struct thread *, struct note_info_list *,
1065  size_t *);
1066 static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t);
1067 static void __elfN(putnote)(struct note_info *, struct sbuf *);
1068 static size_t register_note(struct note_info_list *, int, outfunc_t, void *);
1069 static int sbuf_drain_core_output(void *, const char *, int);
1070 static int sbuf_drain_count(void *arg, const char *data, int len);
1071 
1072 static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *);
1073 static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *);
1074 static void __elfN(note_prstatus)(void *, struct sbuf *, size_t *);
1075 static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *);
1076 static void __elfN(note_thrmisc)(void *, struct sbuf *, size_t *);
1077 static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *);
1078 static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *);
1079 static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *);
1080 static void note_procstat_files(void *, struct sbuf *, size_t *);
1081 static void note_procstat_groups(void *, struct sbuf *, size_t *);
1082 static void note_procstat_osrel(void *, struct sbuf *, size_t *);
1083 static void note_procstat_rlimit(void *, struct sbuf *, size_t *);
1084 static void note_procstat_umask(void *, struct sbuf *, size_t *);
1085 static void note_procstat_vmmap(void *, struct sbuf *, size_t *);
1086 
1087 #ifdef COMPRESS_USER_CORES
1088 extern int compress_user_cores;
1089 extern int compress_user_cores_gzlevel;
1090 #endif
1091 
1092 static int
1093 core_output(struct vnode *vp, void *base, size_t len, off_t offset,
1094  struct ucred *active_cred, struct ucred *file_cred,
1095  struct thread *td, char *core_buf, gzFile gzfile) {
1096 
1097  int error;
1098  if (gzfile) {
1099 #ifdef COMPRESS_USER_CORES
1100  error = compress_core(gzfile, base, core_buf, len, td);
1101 #else
1102  panic("shouldn't be here");
1103 #endif
1104  } else {
1105  error = vn_rdwr_inchunks(UIO_WRITE, vp, base, len, offset,
1106  UIO_USERSPACE, IO_UNIT | IO_DIRECT, active_cred, file_cred,
1107  NULL, td);
1108  }
1109  return (error);
1110 }
1111 
1112 /* Coredump output parameters for sbuf drain routine. */
1114  off_t offset;
1115  struct ucred *active_cred;
1116  struct ucred *file_cred;
1117  struct thread *td;
1118  struct vnode *vp;
1119 #ifdef COMPRESS_USER_CORES
1120  gzFile gzfile;
1121 #endif
1122 };
1123 
1124 /*
1125  * Drain into a core file.
1126  */
1127 static int
1128 sbuf_drain_core_output(void *arg, const char *data, int len)
1129 {
1130  struct sbuf_drain_core_params *p;
1131  int error, locked;
1132 
1133  p = (struct sbuf_drain_core_params *)arg;
1134 
1135  /*
1136  * Some kern_proc out routines that print to this sbuf may
1137  * call us with the process lock held. Draining with the
1138  * non-sleepable lock held is unsafe. The lock is needed for
1139  * those routines when dumping a live process. In our case we
1140  * can safely release the lock before draining and acquire
1141  * again after.
1142  */
1143  locked = PROC_LOCKED(p->td->td_proc);
1144  if (locked)
1145  PROC_UNLOCK(p->td->td_proc);
1146 #ifdef COMPRESS_USER_CORES
1147  if (p->gzfile != Z_NULL)
1148  error = compress_core(p->gzfile, NULL, __DECONST(char *, data),
1149  len, p->td);
1150  else
1151 #endif
1152  error = vn_rdwr_inchunks(UIO_WRITE, p->vp,
1153  __DECONST(void *, data), len, p->offset, UIO_SYSSPACE,
1154  IO_UNIT | IO_DIRECT, p->active_cred, p->file_cred, NULL,
1155  p->td);
1156  if (locked)
1157  PROC_LOCK(p->td->td_proc);
1158  if (error != 0)
1159  return (-error);
1160  p->offset += len;
1161  return (len);
1162 }
1163 
1164 /*
1165  * Drain into a counter.
1166  */
1167 static int
1168 sbuf_drain_count(void *arg, const char *data __unused, int len)
1169 {
1170  size_t *sizep;
1171 
1172  sizep = (size_t *)arg;
1173  *sizep += len;
1174  return (len);
1175 }
1176 
1177 int
1178 __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
1179 {
1180  struct ucred *cred = td->td_ucred;
1181  int error = 0;
1182  struct sseg_closure seginfo;
1183  struct note_info_list notelst;
1184  struct note_info *ninfo;
1185  void *hdr;
1186  size_t hdrsize, notesz, coresize;
1187 
1188  gzFile gzfile = Z_NULL;
1189  char *core_buf = NULL;
1190 #ifdef COMPRESS_USER_CORES
1191  char gzopen_flags[8];
1192  char *p;
1193  int doing_compress = flags & IMGACT_CORE_COMPRESS;
1194 #endif
1195 
1196  hdr = NULL;
1197  TAILQ_INIT(&notelst);
1198 
1199 #ifdef COMPRESS_USER_CORES
1200  if (doing_compress) {
1201  p = gzopen_flags;
1202  *p++ = 'w';
1203  if (compress_user_cores_gzlevel >= 0 &&
1204  compress_user_cores_gzlevel <= 9)
1205  *p++ = '0' + compress_user_cores_gzlevel;
1206  *p = 0;
1207  gzfile = gz_open("", gzopen_flags, vp);
1208  if (gzfile == Z_NULL) {
1209  error = EFAULT;
1210  goto done;
1211  }
1212  core_buf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
1213  if (!core_buf) {
1214  error = ENOMEM;
1215  goto done;
1216  }
1217  }
1218 #endif
1219 
1220  /* Size the program segments. */
1221  seginfo.count = 0;
1222  seginfo.size = 0;
1223  each_writable_segment(td, cb_size_segment, &seginfo);
1224 
1225  /*
1226  * Collect info about the core file header area.
1227  */
1228  hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count);
1229  __elfN(prepare_notes)(td, &notelst, &notesz);
1230  coresize = round_page(hdrsize + notesz) + seginfo.size;
1231 
1232 #ifdef RACCT
1233  PROC_LOCK(td->td_proc);
1234  error = racct_add(td->td_proc, RACCT_CORE, coresize);
1235  PROC_UNLOCK(td->td_proc);
1236  if (error != 0) {
1237  error = EFAULT;
1238  goto done;
1239  }
1240 #endif
1241  if (coresize >= limit) {
1242  error = EFAULT;
1243  goto done;
1244  }
1245 
1246  /*
1247  * Allocate memory for building the header, fill it up,
1248  * and write it out following the notes.
1249  */
1250  hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
1251  if (hdr == NULL) {
1252  error = EINVAL;
1253  goto done;
1254  }
1255  error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize,
1256  &notelst, notesz, gzfile);
1257 
1258  /* Write the contents of all of the writable segments. */
1259  if (error == 0) {
1260  Elf_Phdr *php;
1261  off_t offset;
1262  int i;
1263 
1264  php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
1265  offset = round_page(hdrsize + notesz);
1266  for (i = 0; i < seginfo.count; i++) {
1267  error = core_output(vp, (caddr_t)(uintptr_t)php->p_vaddr,
1268  php->p_filesz, offset, cred, NOCRED, curthread, core_buf, gzfile);
1269  if (error != 0)
1270  break;
1271  offset += php->p_filesz;
1272  php++;
1273  }
1274  }
1275  if (error) {
1276  log(LOG_WARNING,
1277  "Failed to write core file for process %s (error %d)\n",
1278  curproc->p_comm, error);
1279  }
1280 
1281 done:
1282 #ifdef COMPRESS_USER_CORES
1283  if (core_buf)
1284  free(core_buf, M_TEMP);
1285  if (gzfile)
1286  gzclose(gzfile);
1287 #endif
1288  while ((ninfo = TAILQ_FIRST(&notelst)) != NULL) {
1289  TAILQ_REMOVE(&notelst, ninfo, link);
1290  free(ninfo, M_TEMP);
1291  }
1292  if (hdr != NULL)
1293  free(hdr, M_TEMP);
1294 
1295  return (error);
1296 }
1297 
1298 /*
1299  * A callback for each_writable_segment() to write out the segment's
1300  * program header entry.
1301  */
1302 static void
1303 cb_put_phdr(entry, closure)
1304  vm_map_entry_t entry;
1305  void *closure;
1306 {
1307  struct phdr_closure *phc = (struct phdr_closure *)closure;
1308  Elf_Phdr *phdr = phc->phdr;
1309 
1310  phc->offset = round_page(phc->offset);
1311 
1312  phdr->p_type = PT_LOAD;
1313  phdr->p_offset = phc->offset;
1314  phdr->p_vaddr = entry->start;
1315  phdr->p_paddr = 0;
1316  phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1317  phdr->p_align = PAGE_SIZE;
1318  phdr->p_flags = __elfN(untrans_prot)(entry->protection);
1319 
1320  phc->offset += phdr->p_filesz;
1321  phc->phdr++;
1322 }
1323 
1324 /*
1325  * A callback for each_writable_segment() to gather information about
1326  * the number of segments and their total size.
1327  */
1328 static void
1329 cb_size_segment(entry, closure)
1330  vm_map_entry_t entry;
1331  void *closure;
1332 {
1333  struct sseg_closure *ssc = (struct sseg_closure *)closure;
1334 
1335  ssc->count++;
1336  ssc->size += entry->end - entry->start;
1337 }
1338 
1339 /*
1340  * For each writable segment in the process's memory map, call the given
1341  * function with a pointer to the map entry and some arbitrary
1342  * caller-supplied data.
1343  */
1344 static void
1345 each_writable_segment(td, func, closure)
1346  struct thread *td;
1347  segment_callback func;
1348  void *closure;
1349 {
1350  struct proc *p = td->td_proc;
1351  vm_map_t map = &p->p_vmspace->vm_map;
1352  vm_map_entry_t entry;
1353  vm_object_t backing_object, object;
1354  boolean_t ignore_entry;
1355 
1356  vm_map_lock_read(map);
1357  for (entry = map->header.next; entry != &map->header;
1358  entry = entry->next) {
1359  /*
1360  * Don't dump inaccessible mappings, deal with legacy
1361  * coredump mode.
1362  *
1363  * Note that read-only segments related to the elf binary
1364  * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1365  * need to arbitrarily ignore such segments.
1366  */
1367  if (elf_legacy_coredump) {
1368  if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1369  continue;
1370  } else {
1371  if ((entry->protection & VM_PROT_ALL) == 0)
1372  continue;
1373  }
1374 
1375  /*
1376  * Dont include memory segment in the coredump if
1377  * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1378  * madvise(2). Do not dump submaps (i.e. parts of the
1379  * kernel map).
1380  */
1381  if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1382  continue;
1383 
1384  if ((object = entry->object.vm_object) == NULL)
1385  continue;
1386 
1387  /* Ignore memory-mapped devices and such things. */
1388  VM_OBJECT_LOCK(object);
1389  while ((backing_object = object->backing_object) != NULL) {
1390  VM_OBJECT_LOCK(backing_object);
1391  VM_OBJECT_UNLOCK(object);
1392  object = backing_object;
1393  }
1394  ignore_entry = object->type != OBJT_DEFAULT &&
1395  object->type != OBJT_SWAP && object->type != OBJT_VNODE &&
1396  object->type != OBJT_PHYS;
1397  VM_OBJECT_UNLOCK(object);
1398  if (ignore_entry)
1399  continue;
1400 
1401  (*func)(entry, closure);
1402  }
1403  vm_map_unlock_read(map);
1404 }
1405 
1406 /*
1407  * Write the core file header to the file, including padding up to
1408  * the page boundary.
1409  */
1410 static int
1411 __elfN(corehdr)(struct thread *td, struct vnode *vp, struct ucred *cred,
1412  int numsegs, void *hdr, size_t hdrsize, struct note_info_list *notelst,
1413  size_t notesz, gzFile gzfile)
1414 {
1415  struct sbuf_drain_core_params params;
1416  struct note_info *ninfo;
1417  struct sbuf *sb;
1418  int error;
1419 
1420  /* Fill in the header. */
1421  bzero(hdr, hdrsize);
1422  __elfN(puthdr)(td, hdr, hdrsize, numsegs, notesz);
1423 
1424  params.offset = 0;
1425  params.active_cred = cred;
1426  params.file_cred = NOCRED;
1427  params.td = td;
1428  params.vp = vp;
1429 #ifdef COMPRESS_USER_CORES
1430  params.gzfile = gzfile;
1431 #endif
1432  sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN);
1433  sbuf_set_drain(sb, sbuf_drain_core_output, &params);
1434  sbuf_start_section(sb, NULL);
1435  sbuf_bcat(sb, hdr, hdrsize);
1436  TAILQ_FOREACH(ninfo, notelst, link)
1437  __elfN(putnote)(ninfo, sb);
1438  /* Align up to a page boundary for the program segments. */
1439  sbuf_end_section(sb, -1, PAGE_SIZE, 0);
1440  error = sbuf_finish(sb);
1441  sbuf_delete(sb);
1442 
1443  return (error);
1444 }
1445 
1446 static void
1447 __elfN(prepare_notes)(struct thread *td, struct note_info_list *list,
1448  size_t *sizep)
1449 {
1450  struct proc *p;
1451  struct thread *thr;
1452  size_t size;
1453 
1454  p = td->td_proc;
1455  size = 0;
1456 
1457  size += register_note(list, NT_PRPSINFO, __elfN(note_prpsinfo), p);
1458 
1459  /*
1460  * To have the debugger select the right thread (LWP) as the initial
1461  * thread, we dump the state of the thread passed to us in td first.
1462  * This is the thread that causes the core dump and thus likely to
1463  * be the right thread one wants to have selected in the debugger.
1464  */
1465  thr = td;
1466  while (thr != NULL) {
1467  size += register_note(list, NT_PRSTATUS,
1468  __elfN(note_prstatus), thr);
1469  size += register_note(list, NT_FPREGSET,
1470  __elfN(note_fpregset), thr);
1471  size += register_note(list, NT_THRMISC,
1472  __elfN(note_thrmisc), thr);
1473  size += register_note(list, -1,
1474  __elfN(note_threadmd), thr);
1475 
1476  thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1477  TAILQ_NEXT(thr, td_plist);
1478  if (thr == td)
1479  thr = TAILQ_NEXT(thr, td_plist);
1480  }
1481 
1482  size += register_note(list, NT_PROCSTAT_PROC,
1484  size += register_note(list, NT_PROCSTAT_FILES,
1485  note_procstat_files, p);
1486  size += register_note(list, NT_PROCSTAT_VMMAP,
1487  note_procstat_vmmap, p);
1488  size += register_note(list, NT_PROCSTAT_GROUPS,
1490  size += register_note(list, NT_PROCSTAT_UMASK,
1491  note_procstat_umask, p);
1492  size += register_note(list, NT_PROCSTAT_RLIMIT,
1494  size += register_note(list, NT_PROCSTAT_OSREL,
1495  note_procstat_osrel, p);
1496  size += register_note(list, NT_PROCSTAT_PSSTRINGS,
1498  size += register_note(list, NT_PROCSTAT_AUXV,
1500 
1501  *sizep = size;
1502 }
1503 
1504 static void
1505 __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs,
1506  size_t notesz)
1507 {
1508  Elf_Ehdr *ehdr;
1509  Elf_Phdr *phdr;
1510  struct phdr_closure phc;
1511 
1512  ehdr = (Elf_Ehdr *)hdr;
1513  phdr = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr));
1514 
1515  ehdr->e_ident[EI_MAG0] = ELFMAG0;
1516  ehdr->e_ident[EI_MAG1] = ELFMAG1;
1517  ehdr->e_ident[EI_MAG2] = ELFMAG2;
1518  ehdr->e_ident[EI_MAG3] = ELFMAG3;
1519  ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1520  ehdr->e_ident[EI_DATA] = ELF_DATA;
1521  ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1522  ehdr->e_ident[EI_OSABI] = ELFOSABI_BSDSUNIX;
1523  ehdr->e_ident[EI_ABIVERSION] = 0;
1524  ehdr->e_ident[EI_PAD] = 0;
1525  ehdr->e_type = ET_CORE;
1526 #if defined(COMPAT_32BIT) && __ELF_WORD_SIZE == 32
1527  ehdr->e_machine = ELF_ARCH32;
1528 #else
1529  ehdr->e_machine = ELF_ARCH;
1530 #endif
1531  ehdr->e_version = EV_CURRENT;
1532  ehdr->e_entry = 0;
1533  ehdr->e_phoff = sizeof(Elf_Ehdr);
1534  ehdr->e_flags = 0;
1535  ehdr->e_ehsize = sizeof(Elf_Ehdr);
1536  ehdr->e_phentsize = sizeof(Elf_Phdr);
1537  ehdr->e_phnum = numsegs + 1;
1538  ehdr->e_shentsize = sizeof(Elf_Shdr);
1539  ehdr->e_shnum = 0;
1540  ehdr->e_shstrndx = SHN_UNDEF;
1541 
1542  /*
1543  * Fill in the program header entries.
1544  */
1545 
1546  /* The note segement. */
1547  phdr->p_type = PT_NOTE;
1548  phdr->p_offset = hdrsize;
1549  phdr->p_vaddr = 0;
1550  phdr->p_paddr = 0;
1551  phdr->p_filesz = notesz;
1552  phdr->p_memsz = 0;
1553  phdr->p_flags = PF_R;
1554  phdr->p_align = ELF_NOTE_ROUNDSIZE;
1555  phdr++;
1556 
1557  /* All the writable segments from the program. */
1558  phc.phdr = phdr;
1559  phc.offset = round_page(hdrsize + notesz);
1561 }
1562 
1563 static size_t
1564 register_note(struct note_info_list *list, int type, outfunc_t out, void *arg)
1565 {
1566  struct note_info *ninfo;
1567  size_t size, notesize;
1568 
1569  size = 0;
1570  out(arg, NULL, &size);
1571  ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK);
1572  ninfo->type = type;
1573  ninfo->outfunc = out;
1574  ninfo->outarg = arg;
1575  ninfo->outsize = size;
1576  TAILQ_INSERT_TAIL(list, ninfo, link);
1577 
1578  if (type == -1)
1579  return (size);
1580 
1581  notesize = sizeof(Elf_Note) + /* note header */
1582  roundup2(9, ELF_NOTE_ROUNDSIZE) + /* note name ("BSDSUniX") */
1583  roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */
1584 
1585  return (notesize);
1586 }
1587 
1588 static void
1589 __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
1590 {
1591  Elf_Note note;
1592  ssize_t old_len;
1593 
1594  if (ninfo->type == -1) {
1595  ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
1596  return;
1597  }
1598 
1599  note.n_namesz = 9; /* strlen("BSDSUniX") + 1 */
1600  note.n_descsz = ninfo->outsize;
1601  note.n_type = ninfo->type;
1602 
1603  sbuf_bcat(sb, &note, sizeof(note));
1604  sbuf_start_section(sb, &old_len);
1605  sbuf_bcat(sb, "BSDSUniX", note.n_namesz);
1606  sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
1607  if (note.n_descsz == 0)
1608  return;
1609  sbuf_start_section(sb, &old_len);
1610  ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
1611  sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
1612 }
1613 
1614 /*
1615  * Miscellaneous note out functions.
1616  */
1617 
1618 #if defined(COMPAT_32BIT) && __ELF_WORD_SIZE == 32
1619 #include <compat/compat32bit/compat32bit.h>
1620 
1621 typedef struct prstatus32 elf_prstatus_t;
1622 typedef struct prpsinfo32 elf_prpsinfo_t;
1623 typedef struct fpreg32 elf_prfpregset_t;
1624 typedef struct fpreg32 elf_fpregset_t;
1625 typedef struct reg32 elf_gregset_t;
1626 typedef struct thrmisc32 elf_thrmisc_t;
1627 #define ELF_KERN_PROC_MASK KERN_PROC_MASK32
1628 typedef struct kinfo_proc32 elf_kinfo_proc_t;
1629 typedef uint32_t elf_ps_strings_t;
1630 #else
1631 typedef prstatus_t elf_prstatus_t;
1632 typedef prpsinfo_t elf_prpsinfo_t;
1633 typedef prfpregset_t elf_prfpregset_t;
1634 typedef prfpregset_t elf_fpregset_t;
1635 typedef gregset_t elf_gregset_t;
1636 typedef thrmisc_t elf_thrmisc_t;
1637 #define ELF_KERN_PROC_MASK 0
1638 typedef struct kinfo_proc elf_kinfo_proc_t;
1639 typedef vm_offset_t elf_ps_strings_t;
1640 #endif
1641 
1642 static void
1643 __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep)
1644 {
1645  struct proc *p;
1646  elf_prpsinfo_t *psinfo;
1647 
1648  p = (struct proc *)arg;
1649  if (sb != NULL) {
1650  KASSERT(*sizep == sizeof(*psinfo), ("invalid size"));
1651  psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK);
1652  psinfo->pr_version = PRPSINFO_VERSION;
1653  psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1654  strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1655  /*
1656  * XXX - We don't fill in the command line arguments properly
1657  * yet.
1658  */
1659  strlcpy(psinfo->pr_psargs, p->p_comm,
1660  sizeof(psinfo->pr_psargs));
1661 
1662  sbuf_bcat(sb, psinfo, sizeof(*psinfo));
1663  free(psinfo, M_TEMP);
1664  }
1665  *sizep = sizeof(*psinfo);
1666 }
1667 
1668 static void
1669 __elfN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep)
1670 {
1671  struct thread *td;
1672  elf_prstatus_t *status;
1673 
1674  td = (struct thread *)arg;
1675  if (sb != NULL) {
1676  KASSERT(*sizep == sizeof(*status), ("invalid size"));
1677  status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK);
1678  status->pr_version = PRSTATUS_VERSION;
1679  status->pr_statussz = sizeof(elf_prstatus_t);
1680  status->pr_gregsetsz = sizeof(elf_gregset_t);
1681  status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1682  status->pr_osreldate = osreldate;
1683  status->pr_cursig = td->td_proc->p_sig;
1684  status->pr_pid = td->td_tid;
1685 #if defined(COMPAT_32BIT) && __ELF_WORD_SIZE == 32
1686  fill_regs32(td, &status->pr_reg);
1687 #else
1688  fill_regs(td, &status->pr_reg);
1689 #endif
1690  sbuf_bcat(sb, status, sizeof(*status));
1691  free(status, M_TEMP);
1692  }
1693  *sizep = sizeof(*status);
1694 }
1695 
1696 static void
1697 __elfN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep)
1698 {
1699  struct thread *td;
1700  elf_prfpregset_t *fpregset;
1701 
1702  td = (struct thread *)arg;
1703  if (sb != NULL) {
1704  KASSERT(*sizep == sizeof(*fpregset), ("invalid size"));
1705  fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK);
1706 #if defined(COMPAT_32BIT) && __ELF_WORD_SIZE == 32
1707  fill_fpregs32(td, fpregset);
1708 #else
1709  fill_fpregs(td, fpregset);
1710 #endif
1711  sbuf_bcat(sb, fpregset, sizeof(*fpregset));
1712  free(fpregset, M_TEMP);
1713  }
1714  *sizep = sizeof(*fpregset);
1715 }
1716 
1717 static void
1718 __elfN(note_thrmisc)(void *arg, struct sbuf *sb, size_t *sizep)
1719 {
1720  struct thread *td;
1721  elf_thrmisc_t thrmisc;
1722 
1723  td = (struct thread *)arg;
1724  if (sb != NULL) {
1725  KASSERT(*sizep == sizeof(thrmisc), ("invalid size"));
1726  bzero(&thrmisc._pad, sizeof(thrmisc._pad));
1727  strcpy(thrmisc.pr_tname, td->td_name);
1728  sbuf_bcat(sb, &thrmisc, sizeof(thrmisc));
1729  }
1730  *sizep = sizeof(thrmisc);
1731 }
1732 
1733 /*
1734  * Allow for MD specific notes, as well as any MD
1735  * specific preparations for writing MI notes.
1736  */
1737 static void
1738 __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep)
1739 {
1740  struct thread *td;
1741  void *buf;
1742  size_t size;
1743 
1744  td = (struct thread *)arg;
1745  size = *sizep;
1746  if (size != 0 && sb != NULL)
1747  buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
1748  else
1749  buf = NULL;
1750  size = 0;
1751  __elfN(dump_thread)(td, buf, &size);
1752  KASSERT(*sizep == size, ("invalid size"));
1753  if (size != 0 && sb != NULL)
1754  sbuf_bcat(sb, buf, size);
1755  free(buf, M_TEMP);
1756  *sizep = size;
1757 }
1758 
1759 #ifdef KINFO_PROC_SIZE
1760 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
1761 #endif
1762 
1763 static void
1764 __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep)
1765 {
1766  struct proc *p;
1767  size_t size;
1768  int structsize;
1769 
1770  p = (struct proc *)arg;
1771  size = sizeof(structsize) + p->p_numthreads *
1772  sizeof(elf_kinfo_proc_t);
1773 
1774  if (sb != NULL) {
1775  KASSERT(*sizep == size, ("invalid size"));
1776  structsize = sizeof(elf_kinfo_proc_t);
1777  sbuf_bcat(sb, &structsize, sizeof(structsize));
1778  PROC_LOCK(p);
1780  }
1781  *sizep = size;
1782 }
1783 
1784 #ifdef KINFO_FILE_SIZE
1785 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
1786 #endif
1787 
1788 static void
1789 note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
1790 {
1791  struct proc *p;
1792  size_t size;
1793  int structsize;
1794 
1795  p = (struct proc *)arg;
1796  if (sb == NULL) {
1797  size = 0;
1798  sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
1799  sbuf_set_drain(sb, sbuf_drain_count, &size);
1800  sbuf_bcat(sb, &structsize, sizeof(structsize));
1801  PROC_LOCK(p);
1802  kern_proc_filedesc_out(p, sb, -1);
1803  sbuf_finish(sb);
1804  sbuf_delete(sb);
1805  *sizep = size;
1806  } else {
1807  structsize = sizeof(struct kinfo_file);
1808  sbuf_bcat(sb, &structsize, sizeof(structsize));
1809  PROC_LOCK(p);
1810  kern_proc_filedesc_out(p, sb, -1);
1811  }
1812 }
1813 
1814 #ifdef KINFO_VMENTRY_SIZE
1815 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
1816 #endif
1817 
1818 static void
1819 note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
1820 {
1821  struct proc *p;
1822  size_t size;
1823  int structsize;
1824 
1825  p = (struct proc *)arg;
1826  if (sb == NULL) {
1827  size = 0;
1828  sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
1829  sbuf_set_drain(sb, sbuf_drain_count, &size);
1830  sbuf_bcat(sb, &structsize, sizeof(structsize));
1831  PROC_LOCK(p);
1832  kern_proc_vmmap_out(p, sb);
1833  sbuf_finish(sb);
1834  sbuf_delete(sb);
1835  *sizep = size;
1836  } else {
1837  structsize = sizeof(struct kinfo_vmentry);
1838  sbuf_bcat(sb, &structsize, sizeof(structsize));
1839  PROC_LOCK(p);
1840  kern_proc_vmmap_out(p, sb);
1841  }
1842 }
1843 
1844 static void
1845 note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep)
1846 {
1847  struct proc *p;
1848  size_t size;
1849  int structsize;
1850 
1851  p = (struct proc *)arg;
1852  size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t);
1853  if (sb != NULL) {
1854  KASSERT(*sizep == size, ("invalid size"));
1855  structsize = sizeof(gid_t);
1856  sbuf_bcat(sb, &structsize, sizeof(structsize));
1857  sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups *
1858  sizeof(gid_t));
1859  }
1860  *sizep = size;
1861 }
1862 
1863 static void
1864 note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep)
1865 {
1866  struct proc *p;
1867  size_t size;
1868  int structsize;
1869 
1870  p = (struct proc *)arg;
1871  size = sizeof(structsize) + sizeof(p->p_fd->fd_cmask);
1872  if (sb != NULL) {
1873  KASSERT(*sizep == size, ("invalid size"));
1874  structsize = sizeof(p->p_fd->fd_cmask);
1875  sbuf_bcat(sb, &structsize, sizeof(structsize));
1876  sbuf_bcat(sb, &p->p_fd->fd_cmask, sizeof(p->p_fd->fd_cmask));
1877  }
1878  *sizep = size;
1879 }
1880 
1881 static void
1882 note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep)
1883 {
1884  struct proc *p;
1885  struct rlimit rlim[RLIM_NLIMITS];
1886  size_t size;
1887  int structsize, i;
1888 
1889  p = (struct proc *)arg;
1890  size = sizeof(structsize) + sizeof(rlim);
1891  if (sb != NULL) {
1892  KASSERT(*sizep == size, ("invalid size"));
1893  structsize = sizeof(rlim);
1894  sbuf_bcat(sb, &structsize, sizeof(structsize));
1895  PROC_LOCK(p);
1896  for (i = 0; i < RLIM_NLIMITS; i++)
1897  lim_rlimit(p, i, &rlim[i]);
1898  PROC_UNLOCK(p);
1899  sbuf_bcat(sb, rlim, sizeof(rlim));
1900  }
1901  *sizep = size;
1902 }
1903 
1904 static void
1905 note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep)
1906 {
1907  struct proc *p;
1908  size_t size;
1909  int structsize;
1910 
1911  p = (struct proc *)arg;
1912  size = sizeof(structsize) + sizeof(p->p_osrel);
1913  if (sb != NULL) {
1914  KASSERT(*sizep == size, ("invalid size"));
1915  structsize = sizeof(p->p_osrel);
1916  sbuf_bcat(sb, &structsize, sizeof(structsize));
1917  sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel));
1918  }
1919  *sizep = size;
1920 }
1921 
1922 static void
1923 __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep)
1924 {
1925  struct proc *p;
1926  elf_ps_strings_t ps_strings;
1927  size_t size;
1928  int structsize;
1929 
1930  p = (struct proc *)arg;
1931  size = sizeof(structsize) + sizeof(ps_strings);
1932  if (sb != NULL) {
1933  KASSERT(*sizep == size, ("invalid size"));
1934  structsize = sizeof(ps_strings);
1935 #if defined(COMPAT_32BIT) && __ELF_WORD_SIZE == 32
1936  ps_strings = PTROUT(p->p_sysent->sv_psstrings);
1937 #else
1938  ps_strings = p->p_sysent->sv_psstrings;
1939 #endif
1940  sbuf_bcat(sb, &structsize, sizeof(structsize));
1941  sbuf_bcat(sb, &ps_strings, sizeof(ps_strings));
1942  }
1943  *sizep = size;
1944 }
1945 
1946 static void
1947 __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep)
1948 {
1949  struct proc *p;
1950  size_t size;
1951  int structsize;
1952 
1953  p = (struct proc *)arg;
1954  if (sb == NULL) {
1955  size = 0;
1956  sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
1957  sbuf_set_drain(sb, sbuf_drain_count, &size);
1958  sbuf_bcat(sb, &structsize, sizeof(structsize));
1959  PHOLD(p);
1960  proc_getauxv(curthread, p, sb);
1961  PRELE(p);
1962  sbuf_finish(sb);
1963  sbuf_delete(sb);
1964  *sizep = size;
1965  } else {
1966  structsize = sizeof(Elf_Auxinfo);
1967  sbuf_bcat(sb, &structsize, sizeof(structsize));
1968  PHOLD(p);
1969  proc_getauxv(curthread, p, sb);
1970  PRELE(p);
1971  }
1972 }
1973 
1974 static boolean_t
1975 __elfN(parse_notes)(struct image_params *imgp, Elf_Brandnote *checknote,
1976  int32_t *osrel, const Elf_Phdr *pnote)
1977 {
1978  const Elf_Note *note, *note0, *note_end;
1979  const char *note_name;
1980  int i;
1981 
1982  if (pnote == NULL || pnote->p_offset > PAGE_SIZE ||
1983  pnote->p_filesz > PAGE_SIZE - pnote->p_offset)
1984  return (FALSE);
1985 
1986  note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset);
1987  note_end = (const Elf_Note *)(imgp->image_header +
1988  pnote->p_offset + pnote->p_filesz);
1989  for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
1990  if (!aligned(note, Elf32_Addr) || (const char *)note_end -
1991  (const char *)note < sizeof(Elf_Note))
1992  return (FALSE);
1993  if (note->n_namesz != checknote->hdr.n_namesz ||
1994  note->n_descsz != checknote->hdr.n_descsz ||
1995  note->n_type != checknote->hdr.n_type)
1996  goto nextnote;
1997  note_name = (const char *)(note + 1);
1998  if (note_name + checknote->hdr.n_namesz >=
1999  (const char *)note_end || strncmp(checknote->vendor,
2000  note_name, checknote->hdr.n_namesz) != 0)
2001  goto nextnote;
2002 
2003  /*
2004  * Fetch the osreldate for binary
2005  * from the ELF OSABI-note if necessary.
2006  */
2007  if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 &&
2008  checknote->trans_osrel != NULL)
2009  return (checknote->trans_osrel(note, osrel));
2010  return (TRUE);
2011 
2012 nextnote:
2013  note = (const Elf_Note *)((const char *)(note + 1) +
2014  roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
2015  roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE));
2016  }
2017 
2018  return (FALSE);
2019 }
2020 
2021 /*
2022  * Try to find the appropriate ABI-note section for checknote,
2023  * fetch the osreldate for binary from the ELF OSABI-note. Only the
2024  * first page of the image is searched, the same as for headers.
2025  */
2026 static boolean_t
2027 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote,
2028  int32_t *osrel)
2029 {
2030  const Elf_Phdr *phdr;
2031  const Elf_Ehdr *hdr;
2032  int i;
2033 
2034  hdr = (const Elf_Ehdr *)imgp->image_header;
2035  phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
2036 
2037  for (i = 0; i < hdr->e_phnum; i++) {
2038  if (phdr[i].p_type == PT_NOTE &&
2039  __elfN(parse_notes)(imgp, checknote, osrel, &phdr[i]))
2040  return (TRUE);
2041  }
2042  return (FALSE);
2043 
2044 }
2045 
2046 /*
2047  * Tell kern_execve.c about it, with a little help from the linker.
2048  */
2049 static struct execsw __elfN(execsw) = {
2050  __CONCAT(exec_, __elfN(imgact)),
2051  __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
2052 };
2054 
2055 #ifdef COMPRESS_USER_CORES
2056 /*
2057  * Compress and write out a core segment for a user process.
2058  *
2059  * 'inbuf' is the starting address of a VM segment in the process' address
2060  * space that is to be compressed and written out to the core file. 'dest_buf'
2061  * is a buffer in the kernel's address space. The segment is copied from
2062  * 'inbuf' to 'dest_buf' first before being processed by the compression
2063  * routine gzwrite(). This copying is necessary because the content of the VM
2064  * segment may change between the compression pass and the crc-computation pass
2065  * in gzwrite(). This is because realtime threads may preempt the UNIX kernel.
2066  *
2067  * If inbuf is NULL it is assumed that data is already copied to 'dest_buf'.
2068  */
2069 static int
2070 compress_core (gzFile file, char *inbuf, char *dest_buf, unsigned int len,
2071  struct thread *td)
2072 {
2073  int len_compressed;
2074  int error = 0;
2075  unsigned int chunk_len;
2076 
2077  while (len) {
2078  if (inbuf != NULL) {
2079  chunk_len = (len > CORE_BUF_SIZE) ? CORE_BUF_SIZE : len;
2080  copyin(inbuf, dest_buf, chunk_len);
2081  inbuf += chunk_len;
2082  } else {
2083  chunk_len = len;
2084  }
2085  len_compressed = gzwrite(file, dest_buf, chunk_len);
2086 
2087  EVENTHANDLER_INVOKE(app_coredump_progress, td, len_compressed);
2088 
2089  if ((unsigned int)len_compressed != chunk_len) {
2090  log(LOG_WARNING,
2091  "compress_core: length mismatch (0x%x returned, "
2092  "0x%x expected)\n", len_compressed, chunk_len);
2093  EVENTHANDLER_INVOKE(app_coredump_error, td,
2094  "compress_core: length mismatch %x -> %x",
2095  chunk_len, len_compressed);
2096  error = EFAULT;
2097  break;
2098  }
2099  len -= chunk_len;
2100  maybe_yield();
2101  }
2102 
2103  return (error);
2104 }
2105 #endif /* COMPRESS_USER_CORES */
2106 
2107 static vm_prot_t
2108 __elfN(trans_prot)(Elf_Word flags)
2109 {
2110  vm_prot_t prot;
2111 
2112  prot = 0;
2113  if (flags & PF_X)
2114  prot |= VM_PROT_EXECUTE;
2115  if (flags & PF_W)
2116  prot |= VM_PROT_WRITE;
2117  if (flags & PF_R)
2118  prot |= VM_PROT_READ;
2119 #if __ELF_WORD_SIZE == 32
2120 #if defined(__amd64__) || defined(__ia64__)
2121  if (i386_read_exec && (flags & PF_R))
2122  prot |= VM_PROT_EXECUTE;
2123 #endif
2124 #endif
2125  return (prot);
2126 }
2127 
2128 static Elf_Word
2129 __elfN(untrans_prot)(vm_prot_t prot)
2130 {
2131  Elf_Word flags;
2132 
2133  flags = 0;
2134  if (prot & VM_PROT_EXECUTE)
2135  flags |= PF_X;
2136  if (prot & VM_PROT_READ)
2137  flags |= PF_R;
2138  if (prot & VM_PROT_WRITE)
2139  flags |= PF_W;
2140  return (flags);
2141 }
rlim_t lim_max(struct proc *p, int which)
static boolean_t __elfN() bsdsunix_trans_osrel(const Elf_Note *note, int32_t *osrel)
Definition: imgact_elf.c:161
prfpregset_t elf_fpregset_t
Definition: imgact_elf.c:1634
static int __CONCAT(exec_, __elfN(imgact))
Definition: imgact_elf.c:96
static void note_procstat_umask(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1864
int __elfN() coredump(struct thread *td, struct vnode *vp, off_t limit, int flags)
Definition: imgact_elf.c:1178
int exec_check_permissions(struct image_params *imgp)
Definition: kern_exec.c:1414
static int core_output(struct vnode *vp, void *base, size_t len, off_t offset, struct ucred *active_cred, struct ucred *file_cred, struct thread *td, char *core_buf, gzFile gzfile)
Definition: imgact_elf.c:1093
EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw))
struct buf * buf
Definition: vfs_bio.c:97
rlim_t lim_cur(struct proc *p, int which)
int racct_add(struct proc *p, int resource, uint64_t amount)
Definition: kern_racct.c:1208
static int __elfN() corehdr(struct thread *, struct vnode *, struct ucred *, int, void *, size_t, struct note_info_list *, size_t, gzFile)
Definition: imgact_elf.c:1411
char * path
int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base, size_t len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, size_t *aresid, struct thread *td)
Definition: vfs_vnops.c:474
TUNABLE_INT("kern.eventtimer.singlemul",&singlemul)
void NDFREE(struct nameidata *ndp, const u_int flags)
Definition: vfs_lookup.c:1091
static void __elfN() putnote(struct note_info *, struct sbuf *)
Definition: imgact_elf.c:1589
int snprintf(char *str, size_t size, const char *format,...)
Definition: subr_prf.c:509
static SYSCTL_NODE(_debug, OID_AUTO, cpufreq, CTLFLAG_RD, NULL,"cpufreq debugging")
struct ucred * file_cred
Definition: imgact_elf.c:1116
int racct_set(struct proc *p, int resource, uint64_t amount)
Definition: kern_racct.c:1227
void *** start
Definition: linker_if.m:86
struct thread * td
Definition: imgact_elf.c:1117
void * malloc(unsigned long size, struct malloc_type *mtp, int flags)
Definition: kern_malloc.c:454
thrmisc_t elf_thrmisc_t
Definition: imgact_elf.c:1636
u_long maxtsiz
Definition: subr_param.c:102
int __elfN() remove_brand_entry(Elf_Brandinfo *entry)
Definition: imgact_elf.c:226
CTASSERT(MAXSHELLCMDLEN >=MAXINTERP+3)
void panic(const char *fmt,...)
void exec_unmap_first_page(struct image_params *imgp)
Definition: kern_exec.c:1024
static void __elfN() note_procstat_auxv(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1947
TAILQ_HEAD(note_info_list, note_info)
static void __elfN() prepare_notes(struct thread *, struct note_info_list *, size_t *)
Definition: imgact_elf.c:1447
void maybe_yield(void)
Definition: kern_synch.c:584
#define CORE_BUF_SIZE
static int __elfN() map_partial(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot)
Definition: imgact_elf.c:357
static int GNU_KBSDSUNIX_ABI_DESC
Definition: imgact_elf.c:173
#define OLD_EI_BRAND
Definition: imgact_elf.c:86
SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD,&boothowto, 0,"Boot control flags, passed from loader")
void(* outfunc_t)(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1047
int uprintf(const char *fmt,...)
Definition: subr_prf.c:137
static int __elfN() check_header(const Elf_Ehdr *hdr)
Definition: imgact_elf.c:328
__FBSDID("$BSDSUniX$")
outfunc_t outfunc
Definition: imgact_elf.c:1051
Elf_Phdr * phdr
Definition: imgact_elf.c:1037
vm_offset_t elf_ps_strings_t
Definition: imgact_elf.c:1639
static boolean_t kbsdsunix_trans_osrel(const Elf_Note *note, int32_t *osrel)
Definition: imgact_elf.c:185
static void __elfN() puthdr(struct thread *, void *, size_t, int, size_t)
Definition: imgact_elf.c:1505
static void note_procstat_files(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1789
struct ucred * active_cred
Definition: imgact_elf.c:1115
gregset_t elf_gregset_t
Definition: imgact_elf.c:1635
struct sx allproc_lock
Definition: kern_proc.c:136
static Elf_Word __elfN() untrans_prot(vm_prot_t prot)
Definition: imgact_elf.c:2129
ssize_t sbuf_end_section(struct sbuf *s, ssize_t old_len, size_t pad, int c)
Definition: subr_sbuf.c:807
static Elf_Brandinfo *__elfN() get_brandinfo(struct image_params *imgp, const char *interp, int interp_name_len, int32_t *osrel)
Definition: imgact_elf.c:260
void vput(struct vnode *vp)
Definition: vfs_subr.c:2428
static boolean_t __elfN() parse_notes(struct image_params *imgp, Elf_Brandnote *checknote, int32_t *osrel, const Elf_Phdr *pnote)
Definition: imgact_elf.c:1975
int ZEXPORT gzwrite(gzFile file, const voidp buf, unsigned len)
Definition: kern_gzio.c:213
static boolean_t __elfN() check_note(struct image_params *imgp, Elf_Brandnote *checknote, int32_t *osrel)
Definition: imgact_elf.c:2027
int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
static void __elfN() note_procstat_proc(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1764
gzFile gz_open(char *path, const char *mode, struct vnode *vp) const
Definition: kern_gzio.c:88
Elf_Off offset
Definition: imgact_elf.c:1038
static void note_procstat_vmmap(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1819
int exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
Definition: kern_exec.c:1045
void sbuf_set_drain(struct sbuf *s, sbuf_drain_func *func, void *ctx)
Definition: subr_sbuf.c:311
static size_t register_note(struct note_info_list *, int, outfunc_t, void *)
Definition: imgact_elf.c:1564
int kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
Definition: kern_proc.c:1189
int namei(struct nameidata *ndp)
Definition: vfs_lookup.c:135
#define aligned(a, t)
int __elfN() insert_brand_entry(Elf_Brandinfo *entry)
Definition: imgact_elf.c:207
static const struct execsw ** execsw
Definition: kern_exec.c:187
static void each_writable_segment(struct thread *, segment_callback, void *)
Definition: imgact_elf.c:1345
static void cb_put_phdr(vm_map_entry_t, void *)
Definition: imgact_elf.c:1303
static int __elfN() load_section(struct vmspace *vmspace, vm_object_t object, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot, size_t pagesize)
Definition: imgact_elf.c:460
prfpregset_t elf_prfpregset_t
Definition: imgact_elf.c:1633
static void __elfN() note_threadmd(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1738
#define ELF_NOTE_ROUNDSIZE
Definition: imgact_elf.c:85
int __elfN() brand_inuse(Elf_Brandinfo *entry)
Definition: imgact_elf.c:242
void log(int level, const char *fmt,...)
Definition: subr_prf.c:289
void lim_rlimit(struct proc *p, int which, struct rlimit *rlp)
struct sbuf * sbuf_new(struct sbuf *s, char *buf, int length, int flags)
Definition: subr_sbuf.c:211
Elf_Brandnote __elfN(kbsdsunix_brandnote)
#define ELF_KERN_PROC_MASK
Definition: imgact_elf.c:1637
static void __elfN() note_prstatus(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1669
int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
Definition: kern_proc.c:2148
static void __elfN() note_procstat_psstrings(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1923
static void note_procstat_osrel(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1905
void free(void *addr, struct malloc_type *mtp)
Definition: kern_malloc.c:554
int printf(const char *fmt,...)
Definition: subr_prf.c:367
#define suword
Definition: imgact_elf.c:978
static void __elfN() note_fpregset(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1697
static int sbuf_drain_core_output(void *, const char *, int)
Definition: imgact_elf.c:1128
void sbuf_delete(struct sbuf *s)
Definition: subr_sbuf.c:753
static void note_procstat_groups(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1845
static void cb_size_segment(vm_map_entry_t, void *)
Definition: imgact_elf.c:1329
static int __elfN() map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
Definition: imgact_elf.c:392
struct kinfo_proc elf_kinfo_proc_t
Definition: imgact_elf.c:1638
void sbuf_start_section(struct sbuf *s, ssize_t *old_lenp)
Definition: subr_sbuf.c:782
prpsinfo_t elf_prpsinfo_t
Definition: imgact_elf.c:1632
int __elfN() bsdsunix_fixup(register_t **stack_base, struct image_params *imgp)
Definition: imgact_elf.c:981
int sbuf_bcat(struct sbuf *s, const void *buf, size_t len)
Definition: subr_sbuf.c:389
int sbuf_finish(struct sbuf *s)
Definition: subr_sbuf.c:694
int mp_ncpus
Definition: subr_smp.c:63
int ZEXPORT gzclose(gzFile file)
Definition: kern_gzio.c:363
prstatus_t elf_prstatus_t
Definition: imgact_elf.c:1631
static vm_prot_t __elfN() trans_prot(Elf_Word flags)
Definition: imgact_elf.c:2108
static int __elfN() load_file(struct proc *p, const char *file, u_long *addr, u_long *entry, size_t pagesize)
Definition: imgact_elf.c:584
static void __elfN() note_thrmisc(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1718
static void note_procstat_rlimit(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1882
#define trunc_page_ps(va, ps)
#define round_page_ps(va, ps)
void * outarg
Definition: imgact_elf.c:1052
size_t outsize
Definition: imgact_elf.c:1053
static const char GNU_ABI_VENDOR[]
Definition: imgact_elf.c:172
struct vnode * vp
Definition: imgact_elf.c:1118
static int sbuf_drain_count(void *arg, const char *data, int len)
int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
Definition: kern_proc.c:1758
static void __elfN() note_prpsinfo(void *, struct sbuf *, size_t *)
Definition: imgact_elf.c:1643
void(* segment_callback)(vm_map_entry_t, void *)
Definition: imgact_elf.c:1033
#define __ELF_WORD_SIZE
Definition: imgact_elf32.c:30
int exec_map_first_page(struct image_params *imgp)
Definition: kern_exec.c:958