da/d46/kern__fork_8c_source.html

 /*-

  * Copyright (c) 1982, 1986, 1989, 1991, 1993

  *      The Regents of the University of California.  All rights reserved.

  * (c) UNIX System Laboratories, Inc.

  * All or some portions of this file are derived from material licensed

  * to the University of California by American Telephone and Telegraph

  * Co. or Unix System Laboratories, Inc. and are reproduced herein with

  * the permission of UNIX System Laboratories, Inc.

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions

  * are met:

  * 1. Redistributions of source code must retain the above copyright

  *    notice, this list of conditions and the following disclaimer.

  * 2. Redistributions in binary form must reproduce the above copyright

  *    notice, this list of conditions and the following disclaimer in the

  *    documentation and/or other materials provided with the distribution.

  * 4. Neither the name of the University nor the names of its contributors

  *    may be used to endorse or promote products derived from this software

  *    without specific prior written permission.

  *

  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

  * SUCH DAMAGE.

  *

  *      @(#)kern_fork.c 8.6 (Berkeley) 4/8/94

  */


 #include <sys/cdefs.h>

 __FBSDID("$BSDSUniX$");


 #include "opt_kdtrace.h"

 #include "opt_ktrace.h"

 #include "opt_kstack_pages.h"

 #include "opt_procdesc.h"


 #include <sys/param.h>

 #include <sys/systm.h>

 #include <sys/sysproto.h>

 #include <sys/eventhandler.h>

 #include <sys/fcntl.h>

 #include <sys/filedesc.h>

 #include <sys/jail.h>

 #include <sys/kernel.h>

 #include <sys/kthread.h>

 #include <sys/sysctl.h>

 #include <sys/lock.h>

 #include <sys/malloc.h>

 #include <sys/mutex.h>

 #include <sys/priv.h>

 #include <sys/proc.h>

 #include <sys/procdesc.h>

 #include <sys/pioctl.h>

 #include <sys/ptrace.h>

 #include <sys/racct.h>

 #include <sys/resourcevar.h>

 #include <sys/sched.h>

 #include <sys/syscall.h>

 #include <sys/vmmeter.h>

 #include <sys/vnode.h>

 #include <sys/acct.h>

 #include <sys/ktr.h>

 #include <sys/ktrace.h>

 #include <sys/unistd.h>

 #include <sys/sdt.h>

 #include <sys/sx.h>

 #include <sys/sysent.h>

 #include <sys/signalvar.h>


 #include <security/audit/audit.h>

 #include <security/mac/mac_framework.h>


 #include <vm/vm.h>

 #include <vm/pmap.h>

 #include <vm/vm_map.h>

 #include <vm/vm_extern.h>

 #include <vm/uma.h>


 #ifdef KDTRACE_HOOKS

 #include <sys/dtrace_bsd.h>

 dtrace_fork_func_t      dtrace_fasttrap_fork;

 #endif


 SDT_PROVIDER_DECLARE(proc);

 SDT_PROBE_DEFINE3(proc, kernel, , create, "struct proc *",

     "struct proc *", "int");


 #ifndef _SYS_SYSPROTO_H_

 struct fork_args {

         int     dummy;

 };

 #endif


 /* ARGSUSED */

 int

 sys_fork(struct thread *td, struct fork_args *uap)

 {

         int error;

         struct proc *p2;


         error = fork1(td, RFFDG | RFPROC, 0, &p2, NULL, 0);

         if (error == 0) {

                 td->td_retval[0] = p2->p_pid;

                 td->td_retval[1] = 0;

         }

         return (error);

 }


 /* ARGUSED */

 int

 sys_pdfork(td, uap)

         struct thread *td;

         struct pdfork_args *uap;

 {

 #ifdef PROCDESC

         int error, fd;

         struct proc *p2;


         /*

          * It is necessary to return fd by reference because 0 is a valid file

          * descriptor number, and the child needs to be able to distinguish

          * itself from the parent using the return value.

          */

         error = fork1(td, RFFDG | RFPROC | RFPROCDESC, 0, &p2,

             &fd, uap->flags);

         if (error == 0) {

                 td->td_retval[0] = p2->p_pid;

                 td->td_retval[1] = 0;

                 error = copyout(&fd, uap->fdp, sizeof(fd));

         }

         return (error);

 #else

         return (ENOSYS);

 #endif

 }


 /* ARGSUSED */

 int

 sys_vfork(struct thread *td, struct vfork_args *uap)

 {

         int error, flags;

         struct proc *p2;


 #ifdef XEN

         flags = RFFDG | RFPROC; /* validate that this is still an issue */

 #else

         flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;

 #endif

         error = fork1(td, flags, 0, &p2, NULL, 0);

         if (error == 0) {

                 td->td_retval[0] = p2->p_pid;

                 td->td_retval[1] = 0;

         }

         return (error);

 }


 int

 sys_rfork(struct thread *td, struct rfork_args *uap)

 {

         struct proc *p2;

         int error;


         /* Don't allow kernel-only flags. */

         if ((uap->flags & RFKERNELONLY) != 0)

                 return (EINVAL);


         AUDIT_ARG_FFLAGS(uap->flags);

         error = fork1(td, uap->flags, 0, &p2, NULL, 0);

         if (error == 0) {

                 td->td_retval[0] = p2 ? p2->p_pid : 0;

                 td->td_retval[1] = 0;

         }

         return (error);

 }


 int     nprocs = 1;             /* process 0 */

 int     lastpid = 0;

 SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,

     "Last used PID");


 /*

  * Random component to lastpid generation.  We mix in a random factor to make

  * it a little harder to predict.  We sanity check the modulus value to avoid

  * doing it in critical paths.  Don't let it be too small or we pointlessly

  * waste randomness entropy, and don't let it be impossibly large.  Using a

  * modulus that is too big causes a LOT more process table scans and slows

  * down fork processing as the pidchecked caching is defeated.

  */

 static int randompid = 0;


 static int

 sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)

 {

         int error, pid;


         error = sysctl_wire_old_buffer(req, sizeof(int));

         if (error != 0)

                 return(error);

         sx_xlock(&allproc_lock);

         pid = randompid;

         error = sysctl_handle_int(oidp, &pid, 0, req);

         if (error == 0 && req->newptr != NULL) {

                 if (pid < 0 || pid > pid_max - 100)     /* out of range */

                         pid = pid_max - 100;

                 else if (pid < 2)                       /* NOP */

                         pid = 0;

                 else if (pid < 100)                     /* Make it reasonable */

                         pid = 100;

                 randompid = pid;

         }

         sx_xunlock(&allproc_lock);

         return (error);

 }


 SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,

     0, 0, sysctl_kern_randompid, "I", "Random PID modulus");


 static int

 fork_findpid(int flags)

 {

         struct proc *p;

         int trypid;

         static int pidchecked = 0;


         /*

          * Requires allproc_lock in order to iterate over the list

          * of processes, and proctree_lock to access p_pgrp.

          */

         sx_assert(&allproc_lock, SX_LOCKED);

         sx_assert(&proctree_lock, SX_LOCKED);


         /*

          * Find an unused process ID.  We remember a range of unused IDs

          * ready to use (from lastpid+1 through pidchecked-1).

          *

          * If RFHIGHPID is set (used during system boot), do not allocate

          * low-numbered pids.

          */

         trypid = lastpid + 1;

         if (flags & RFHIGHPID) {

                 if (trypid < 10)

                         trypid = 10;

         } else {

                 if (randompid)

                         trypid += arc4random() % randompid;

         }

 retry:

         /*

          * If the process ID prototype has wrapped around,

          * restart somewhat above 0, as the low-numbered procs

          * tend to include daemons that don't exit.

          */

         if (trypid >= pid_max) {

                 trypid = trypid % pid_max;

                 if (trypid < 100)

                         trypid += 100;

                 pidchecked = 0;

         }

         if (trypid >= pidchecked) {

                 int doingzomb = 0;


                 pidchecked = PID_MAX;

                 /*

                  * Scan the active and zombie procs to check whether this pid

                  * is in use.  Remember the lowest pid that's greater

                  * than trypid, so we can avoid checking for a while.

                  */

                 p = LIST_FIRST(&allproc);

 again:

                 for (; p != NULL; p = LIST_NEXT(p, p_list)) {

                         while (p->p_pid == trypid ||

                             (p->p_pgrp != NULL &&

                             (p->p_pgrp->pg_id == trypid ||

                             (p->p_session != NULL &&

                             p->p_session->s_sid == trypid)))) {

                                 trypid++;

                                 if (trypid >= pidchecked)

                                         goto retry;

                         }

                         if (p->p_pid > trypid && pidchecked > p->p_pid)

                                 pidchecked = p->p_pid;

                         if (p->p_pgrp != NULL) {

                                 if (p->p_pgrp->pg_id > trypid &&

                                     pidchecked > p->p_pgrp->pg_id)

                                         pidchecked = p->p_pgrp->pg_id;

                                 if (p->p_session != NULL &&

                                     p->p_session->s_sid > trypid &&

                                     pidchecked > p->p_session->s_sid)

                                         pidchecked = p->p_session->s_sid;

                         }

                 }

                 if (!doingzomb) {

                         doingzomb = 1;

                         p = LIST_FIRST(&zombproc);

                         goto again;

                 }

         }


         /*

          * RFHIGHPID does not mess with the lastpid counter during boot.

          */

         if (flags & RFHIGHPID)

                 pidchecked = 0;

         else

                 lastpid = trypid;


         return (trypid);

 }


 static int

 fork_norfproc(struct thread *td, int flags)

 {

         int error;

         struct proc *p1;


         KASSERT((flags & RFPROC) == 0,

             ("fork_norfproc called with RFPROC set"));

         p1 = td->td_proc;


         if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&

             (flags & (RFCFDG | RFFDG))) {

                 PROC_LOCK(p1);

                 if (thread_single(SINGLE_BOUNDARY)) {

                         PROC_UNLOCK(p1);

                         return (ERESTART);

                 }

                 PROC_UNLOCK(p1);

         }


         error = vm_forkproc(td, NULL, NULL, NULL, flags);

         if (error)

                 goto fail;


         /*

          * Close all file descriptors.

          */

         if (flags & RFCFDG) {

                 struct filedesc *fdtmp;

                 fdtmp = fdinit(td->td_proc->p_fd);

                 fdfree(td);

                 p1->p_fd = fdtmp;

         }


         /*

          * Unshare file descriptors (from parent).

          */

         if (flags & RFFDG)

                 fdunshare(p1, td);


 fail:

         if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&

             (flags & (RFCFDG | RFFDG))) {

                 PROC_LOCK(p1);

                 thread_single_end();

                 PROC_UNLOCK(p1);

         }

         return (error);

 }


 static void

 do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2,

     struct vmspace *vm2, int pdflags)

 {

         struct proc *p1, *pptr;

         int p2_held, trypid;

         struct filedesc *fd;

         struct filedesc_to_leader *fdtol;

         struct sigacts *newsigacts;


         sx_assert(&proctree_lock, SX_SLOCKED);

         sx_assert(&allproc_lock, SX_XLOCKED);


         p2_held = 0;

         p1 = td->td_proc;


         /*

          * Increment the nprocs resource before blocking can occur.  There

          * are hard-limits as to the number of processes that can run.

          */

         nprocs++;


         trypid = fork_findpid(flags);


         sx_sunlock(&proctree_lock);


         p2->p_state = PRS_NEW;          /* protect against others */

         p2->p_pid = trypid;

         AUDIT_ARG_PID(p2->p_pid);

         LIST_INSERT_HEAD(&allproc, p2, p_list);

         LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);

         tidhash_add(td2);

         PROC_LOCK(p2);

         PROC_LOCK(p1);


         sx_xunlock(&allproc_lock);


         bcopy(&p1->p_startcopy, &p2->p_startcopy,

             __rangeof(struct proc, p_startcopy, p_endcopy));

         pargs_hold(p2->p_args);

         PROC_UNLOCK(p1);


         bzero(&p2->p_startzero,

             __rangeof(struct proc, p_startzero, p_endzero));

         p2->p_treeflag = 0;


         p2->p_ucred = crhold(td->td_ucred);


         /* Tell the prison that we exist. */

         prison_proc_hold(p2->p_ucred->cr_prison);


         PROC_UNLOCK(p2);


         /*

          * Malloc things while we don't hold any locks.

          */

         if (flags & RFSIGSHARE)

                 newsigacts = NULL;

         else

                 newsigacts = sigacts_alloc();


         /*

          * Copy filedesc.

          */

         if (flags & RFCFDG) {

                 fd = fdinit(p1->p_fd);

                 fdtol = NULL;

         } else if (flags & RFFDG) {

                 fd = fdcopy(p1->p_fd);

                 fdtol = NULL;

         } else {

                 fd = fdshare(p1->p_fd);

                 if (p1->p_fdtol == NULL)

                         p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL,

                             p1->p_leader);

                 if ((flags & RFTHREAD) != 0) {

                         /*

                          * Shared file descriptor table, and shared

                          * process leaders.

                          */

                         fdtol = p1->p_fdtol;

                         FILEDESC_XLOCK(p1->p_fd);

                         fdtol->fdl_refcount++;

                         FILEDESC_XUNLOCK(p1->p_fd);

                 } else {

                         /*

                          * Shared file descriptor table, and different

                          * process leaders.

                          */

                         fdtol = filedesc_to_leader_alloc(p1->p_fdtol,

                             p1->p_fd, p2);

                 }

         }

         /*

          * Make a proc table entry for the new process.

          * Start by zeroing the section of proc that is zero-initialized,

          * then copy the section that is copied directly from the parent.

          */


         PROC_LOCK(p2);

         PROC_LOCK(p1);


         bzero(&td2->td_startzero,

             __rangeof(struct thread, td_startzero, td_endzero));


         bcopy(&td->td_startcopy, &td2->td_startcopy,

             __rangeof(struct thread, td_startcopy, td_endcopy));


         bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));

         td2->td_sigstk = td->td_sigstk;

         td2->td_sigmask = td->td_sigmask;

         td2->td_flags = TDF_INMEM;

         td2->td_lend_user_pri = PRI_MAX;

         td2->td_dbg_sc_code = td->td_dbg_sc_code;

         td2->td_dbg_sc_narg = td->td_dbg_sc_narg;


 #ifdef VIMAGE

         td2->td_vnet = NULL;

         td2->td_vnet_lpush = NULL;

 #endif


         /*

          * Allow the scheduler to initialize the child.

          */

         thread_lock(td);

         sched_fork(td, td2);

         thread_unlock(td);


         /*

          * Duplicate sub-structures as needed.

          * Increase reference counts on shared objects.

          */

         p2->p_flag = P_INMEM;

         p2->p_flag2 = 0;

         p2->p_swtick = ticks;

         if (p1->p_flag & P_PROFIL)

                 startprofclock(p2);

         td2->td_ucred = crhold(p2->p_ucred);


         if (flags & RFSIGSHARE) {

                 p2->p_sigacts = sigacts_hold(p1->p_sigacts);

         } else {

                 sigacts_copy(newsigacts, p1->p_sigacts);

                 p2->p_sigacts = newsigacts;

         }


         if (flags & RFTSIGZMB)

                 p2->p_sigparent = RFTSIGNUM(flags);

         else if (flags & RFLINUXTHPN)

                 p2->p_sigparent = SIGUSR1;

         else

                 p2->p_sigparent = SIGCHLD;


         p2->p_textvp = p1->p_textvp;

         p2->p_fd = fd;

         p2->p_fdtol = fdtol;


         if (p1->p_flag2 & P2_INHERIT_PROTECTED) {

                 p2->p_flag |= P_PROTECTED;

                 p2->p_flag2 |= P2_INHERIT_PROTECTED;

         }


         /*

          * p_limit is copy-on-write.  Bump its refcount.

          */

         lim_fork(p1, p2);


         pstats_fork(p1->p_stats, p2->p_stats);


         PROC_UNLOCK(p1);

         PROC_UNLOCK(p2);


         /* Bump references to the text vnode (for procfs). */

         if (p2->p_textvp)

                 vref(p2->p_textvp);


         /*

          * Set up linkage for kernel based threading.

          */

         if ((flags & RFTHREAD) != 0) {

                 mtx_lock(&ppeers_lock);

                 p2->p_peers = p1->p_peers;

                 p1->p_peers = p2;

                 p2->p_leader = p1->p_leader;

                 mtx_unlock(&ppeers_lock);

                 PROC_LOCK(p1->p_leader);

                 if ((p1->p_leader->p_flag & P_WEXIT) != 0) {

                         PROC_UNLOCK(p1->p_leader);

                         /*

                          * The task leader is exiting, so process p1 is

                          * going to be killed shortly.  Since p1 obviously

                          * isn't dead yet, we know that the leader is either

                          * sending SIGKILL's to all the processes in this

                          * task or is sleeping waiting for all the peers to

                          * exit.  We let p1 complete the fork, but we need

                          * to go ahead and kill the new process p2 since

                          * the task leader may not get a chance to send

                          * SIGKILL to it.  We leave it on the list so that

                          * the task leader will wait for this new process

                          * to commit suicide.

                          */

                         PROC_LOCK(p2);

                         kern_psignal(p2, SIGKILL);

                         PROC_UNLOCK(p2);

                 } else

                         PROC_UNLOCK(p1->p_leader);

         } else {

                 p2->p_peers = NULL;

                 p2->p_leader = p2;

         }


         sx_xlock(&proctree_lock);

         PGRP_LOCK(p1->p_pgrp);

         PROC_LOCK(p2);

         PROC_LOCK(p1);


         /*

          * Preserve some more flags in subprocess.  P_PROFIL has already

          * been preserved.

          */

         p2->p_flag |= p1->p_flag & P_SUGID;

         td2->td_pflags |= td->td_pflags & TDP_ALTSTACK;

         SESS_LOCK(p1->p_session);

         if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)

                 p2->p_flag |= P_CONTROLT;

         SESS_UNLOCK(p1->p_session);

         if (flags & RFPPWAIT)

                 p2->p_flag |= P_PPWAIT;


         p2->p_pgrp = p1->p_pgrp;

         LIST_INSERT_AFTER(p1, p2, p_pglist);

         PGRP_UNLOCK(p1->p_pgrp);

         LIST_INIT(&p2->p_children);

         LIST_INIT(&p2->p_orphans);


         callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);


         /*

          * If PF_FORK is set, the child process inherits the

          * procfs ioctl flags from its parent.

          */

         if (p1->p_pfsflags & PF_FORK) {

                 p2->p_stops = p1->p_stops;

                 p2->p_pfsflags = p1->p_pfsflags;

         }


         /*

          * This begins the section where we must prevent the parent

          * from being swapped.

          */

         _PHOLD(p1);

         PROC_UNLOCK(p1);


         /*

          * Attach the new process to its parent.

          *

          * If RFNOWAIT is set, the newly created process becomes a child

          * of init.  This effectively disassociates the child from the

          * parent.

          */

         if (flags & RFNOWAIT)

                 pptr = initproc;

         else

                 pptr = p1;

         p2->p_pptr = pptr;

         LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);

         sx_xunlock(&proctree_lock);


         /* Inform accounting that we have forked. */

         p2->p_acflag = AFORK;

         PROC_UNLOCK(p2);


 #ifdef KTRACE

         ktrprocfork(p1, p2);

 #endif


         /*

          * Finish creating the child process.  It will return via a different

          * execution path later.  (ie: directly into user mode)

          */

         vm_forkproc(td, p2, td2, vm2, flags);


         if (flags == (RFFDG | RFPROC)) {

                 PCPU_INC(cnt.v_forks);

                 PCPU_ADD(cnt.v_forkpages, p2->p_vmspace->vm_dsize +

                     p2->p_vmspace->vm_ssize);

         } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {

                 PCPU_INC(cnt.v_vforks);

                 PCPU_ADD(cnt.v_vforkpages, p2->p_vmspace->vm_dsize +

                     p2->p_vmspace->vm_ssize);

         } else if (p1 == &proc0) {

                 PCPU_INC(cnt.v_kthreads);

                 PCPU_ADD(cnt.v_kthreadpages, p2->p_vmspace->vm_dsize +

                     p2->p_vmspace->vm_ssize);

         } else {

                 PCPU_INC(cnt.v_rforks);

                 PCPU_ADD(cnt.v_rforkpages, p2->p_vmspace->vm_dsize +

                     p2->p_vmspace->vm_ssize);

         }


 #ifdef PROCDESC

         /*

          * Associate the process descriptor with the process before anything

          * can happen that might cause that process to need the descriptor.

          * However, don't do this until after fork(2) can no longer fail.

          */

         if (flags & RFPROCDESC)

                 procdesc_new(p2, pdflags);

 #endif


         /*

          * Both processes are set up, now check if any loadable modules want

          * to adjust anything.

          */

         EVENTHANDLER_INVOKE(process_fork, p1, p2, flags);


         /*

          * Set the child start time and mark the process as being complete.

          */

         PROC_LOCK(p2);

         PROC_LOCK(p1);

         microuptime(&p2->p_stats->p_start);

         PROC_SLOCK(p2);

         p2->p_state = PRS_NORMAL;

         PROC_SUNLOCK(p2);


 #ifdef KDTRACE_HOOKS

         /*

          * Tell the DTrace fasttrap provider about the new process so that any

          * tracepoints inherited from the parent can be removed. We have to do

          * this only after p_state is PRS_NORMAL since the fasttrap module will

          * use pfind() later on.

          */

         if ((flags & RFMEM) == 0 && dtrace_fasttrap_fork)

                 dtrace_fasttrap_fork(p1, p2);

 #endif

         if ((p1->p_flag & (P_TRACED | P_FOLLOWFORK)) == (P_TRACED |

             P_FOLLOWFORK)) {

                 /*

                  * Arrange for debugger to receive the fork event.

                  *

                  * We can report PL_FLAG_FORKED regardless of

                  * P_FOLLOWFORK settings, but it does not make a sense

                  * for runaway child.

                  */

                 td->td_dbgflags |= TDB_FORK;

                 td->td_dbg_forked = p2->p_pid;

                 td2->td_dbgflags |= TDB_STOPATFORK;

                 _PHOLD(p2);

                 p2_held = 1;

         }

         if (flags & RFPPWAIT) {

                 td->td_pflags |= TDP_RFPPWAIT;

                 td->td_rfppwait_p = p2;

         }

         PROC_UNLOCK(p2);

         if ((flags & RFSTOPPED) == 0) {

                 /*

                  * If RFSTOPPED not requested, make child runnable and

                  * add to run queue.

                  */

                 thread_lock(td2);

                 TD_SET_CAN_RUN(td2);

                 sched_add(td2, SRQ_BORING);

                 thread_unlock(td2);

         }


         /*

          * Now can be swapped.

          */

         _PRELE(p1);

         PROC_UNLOCK(p1);


         /*

          * Tell any interested parties about the new process.

          */

         knote_fork(&p1->p_klist, p2->p_pid);

         SDT_PROBE3(proc, kernel, , create, p2, p1, flags);


         /*

          * Wait until debugger is attached to child.

          */

         PROC_LOCK(p2);

         while ((td2->td_dbgflags & TDB_STOPATFORK) != 0)

                 cv_wait(&p2->p_dbgwait, &p2->p_mtx);

         if (p2_held)

                 _PRELE(p2);

         PROC_UNLOCK(p2);

 }


 int

 fork1(struct thread *td, int flags, int pages, struct proc **procp,

     int *procdescp, int pdflags)

 {

         struct proc *p1;

         struct proc *newproc;

         int ok;

         struct thread *td2;

         struct vmspace *vm2;

         vm_ooffset_t mem_charged;

         int error;

         static int curfail;

         static struct timeval lastfail;

 #ifdef PROCDESC

         struct file *fp_procdesc = NULL;

 #endif


         /* Check for the undefined or unimplemented flags. */

         if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0)

                 return (EINVAL);


         /* Signal value requires RFTSIGZMB. */

         if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0)

                 return (EINVAL);


         /* Can't copy and clear. */

         if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))

                 return (EINVAL);


         /* Check the validity of the signal number. */

         if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG)

                 return (EINVAL);


 #ifdef PROCDESC

         if ((flags & RFPROCDESC) != 0) {

                 /* Can't not create a process yet get a process descriptor. */

                 if ((flags & RFPROC) == 0)

                         return (EINVAL);


                 /* Must provide a place to put a procdesc if creating one. */

                 if (procdescp == NULL)

                         return (EINVAL);

         }

 #endif


         p1 = td->td_proc;


         /*

          * Here we don't create a new process, but we divorce

          * certain parts of a process from itself.

          */

         if ((flags & RFPROC) == 0) {

                 *procp = NULL;

                 return (fork_norfproc(td, flags));

         }


 #ifdef PROCDESC

         /*

          * If required, create a process descriptor in the parent first; we

          * will abandon it if something goes wrong. We don't finit() until

          * later.

          */

         if (flags & RFPROCDESC) {

                 error = falloc(td, &fp_procdesc, procdescp, 0);

                 if (error != 0)

                         return (error);

         }

 #endif


         mem_charged = 0;

         vm2 = NULL;

         if (pages == 0)

                 pages = KSTACK_PAGES;

         /* Allocate new proc. */

         newproc = uma_zalloc(proc_zone, M_WAITOK);

         td2 = FIRST_THREAD_IN_PROC(newproc);

         if (td2 == NULL) {

                 td2 = thread_alloc(pages);

                 if (td2 == NULL) {

                         error = ENOMEM;

                         goto fail1;

                 }

                 proc_linkup(newproc, td2);

         } else {

                 if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) {

                         if (td2->td_kstack != 0)

                                 vm_thread_dispose(td2);

                         if (!thread_alloc_stack(td2, pages)) {

                                 error = ENOMEM;

                                 goto fail1;

                         }

                 }

         }


         if ((flags & RFMEM) == 0) {

                 vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);

                 if (vm2 == NULL) {

                         error = ENOMEM;

                         goto fail1;

                 }

                 if (!swap_reserve(mem_charged)) {

                         /*

                          * The swap reservation failed. The accounting

                          * from the entries of the copied vm2 will be

                          * substracted in vmspace_free(), so force the

                          * reservation there.

                          */

                         swap_reserve_force(mem_charged);

                         error = ENOMEM;

                         goto fail1;

                 }

         } else

                 vm2 = NULL;


         /*

          * XXX: This is ugly; when we copy resource usage, we need to bump

          *      per-cred resource counters.

          */

         newproc->p_ucred = p1->p_ucred;


         /*

          * Initialize resource accounting for the child process.

          */

         error = racct_proc_fork(p1, newproc);

         if (error != 0) {

                 error = EAGAIN;

                 goto fail1;

         }


 #ifdef MAC

         mac_proc_init(newproc);

 #endif

         knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx);

         STAILQ_INIT(&newproc->p_ktr);


         /* We have to lock the process tree while we look for a pid. */

         sx_slock(&proctree_lock);


         /*

          * Although process entries are dynamically created, we still keep

          * a global limit on the maximum number we will create.  Don't allow

          * a nonprivileged user to use the last ten processes; don't let root

          * exceed the limit. The variable nprocs is the current number of

          * processes, maxproc is the limit.

          */

         sx_xlock(&allproc_lock);

         if ((nprocs >= maxproc - 10 && priv_check_cred(td->td_ucred,

             PRIV_MAXPROC, 0) != 0) || nprocs >= maxproc) {

                 error = EAGAIN;

                 goto fail;

         }


         /*

          * Increment the count of procs running with this uid. Don't allow

          * a nonprivileged user to exceed their current limit.

          *

          * XXXRW: Can we avoid privilege here if it's not needed?

          */

         error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0);

         if (error == 0)

                 ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0);

         else {

                 PROC_LOCK(p1);

                 ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,

                     lim_cur(p1, RLIMIT_NPROC));

                 PROC_UNLOCK(p1);

         }

         if (ok) {

                 do_fork(td, flags, newproc, td2, vm2, pdflags);


                 /*

                  * Return child proc pointer to parent.

                  */

                 *procp = newproc;

 #ifdef PROCDESC

                 if (flags & RFPROCDESC)

                         procdesc_finit(newproc->p_procdesc, fp_procdesc);

 #endif

                 racct_proc_fork_done(newproc);

                 return (0);

         }


         error = EAGAIN;

 fail:

         sx_sunlock(&proctree_lock);

         if (ppsratecheck(&lastfail, &curfail, 1))

                 printf("maxproc limit exceeded by uid %u (pid %d); see tuning(7) and login.conf(5)\n",

                     td->td_ucred->cr_ruid, p1->p_pid);

         sx_xunlock(&allproc_lock);

 #ifdef MAC

         mac_proc_destroy(newproc);

 #endif

         racct_proc_exit(newproc);

 fail1:

         if (vm2 != NULL)

                 vmspace_free(vm2);

         uma_zfree(proc_zone, newproc);

 #ifdef PROCDESC

         if (((flags & RFPROCDESC) != 0) && (fp_procdesc != NULL))

                 fdrop(fp_procdesc, td);

 #endif

         pause("fork", hz / 2);

         return (error);

 }


 /*

  * Handle the return of a child process from fork1().  This function

  * is called from the MD fork_trampoline() entry point.

  */

 void

 fork_exit(void (*callout)(void *, struct trapframe *), void *arg,

     struct trapframe *frame)

 {

         struct proc *p;

         struct thread *td;

         struct thread *dtd;


         td = curthread;

         p = td->td_proc;

         KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));


         CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)",

                 td, td->td_sched, p->p_pid, td->td_name);


         sched_fork_exit(td);

         /*

         * Processes normally resume in mi_switch() after being

         * cpu_switch()'ed to, but when children start up they arrive here

         * instead, so we must do much the same things as mi_switch() would.

         */

         if ((dtd = PCPU_GET(deadthread))) {

                 PCPU_SET(deadthread, NULL);

                 thread_stash(dtd);

         }

         thread_unlock(td);


         /*

          * cpu_set_fork_handler intercepts this function call to

          * have this call a non-return function to stay in kernel mode.

          * initproc has its own fork handler, but it does return.

          */

         KASSERT(callout != NULL, ("NULL callout in fork_exit"));

         callout(arg, frame);


         /*

          * Check if a kernel thread misbehaved and returned from its main

          * function.

          */

         if (p->p_flag & P_KTHREAD) {

                 printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",

                     td->td_name, p->p_pid);

                 kthread_exit();

         }

         mtx_assert(&Giant, MA_NOTOWNED);


         if (p->p_sysent->sv_schedtail != NULL)

                 (p->p_sysent->sv_schedtail)(td);

 }


 /*

  * Simplified back end of syscall(), used when returning from fork()

  * directly into user mode.  Giant is not held on entry, and must not

  * be held on return.  This function is passed in to fork_exit() as the

  * first parameter and is called when returning to a new userland process.

  */

 void

 fork_return(struct thread *td, struct trapframe *frame)

 {

         struct proc *p, *dbg;


         p = td->td_proc;

         if (td->td_dbgflags & TDB_STOPATFORK) {

                 sx_xlock(&proctree_lock);

                 PROC_LOCK(p);

                 if ((p->p_pptr->p_flag & (P_TRACED | P_FOLLOWFORK)) ==

                     (P_TRACED | P_FOLLOWFORK)) {

                         /*

                          * If debugger still wants auto-attach for the

                          * parent's children, do it now.

                          */

                         dbg = p->p_pptr->p_pptr;

                         p->p_flag |= P_TRACED;

                         p->p_oppid = p->p_pptr->p_pid;

                         CTR2(KTR_PTRACE,

                     "fork_return: attaching to new child pid %d: oppid %d",

                             p->p_pid, p->p_oppid);

                         proc_reparent(p, dbg);

                         sx_xunlock(&proctree_lock);

                         td->td_dbgflags |= TDB_CHILD | TDB_SCX;

                         ptracestop(td, SIGSTOP);

                         td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX);

                 } else {

                         /*

                          * ... otherwise clear the request.

                          */

                         sx_xunlock(&proctree_lock);

                         td->td_dbgflags &= ~TDB_STOPATFORK;

                         cv_broadcast(&p->p_dbgwait);

                 }

                 PROC_UNLOCK(p);

         } else if (p->p_flag & P_TRACED) {

                 /*

                  * This is the start of a new thread in a traced

                  * process.  Report a system call exit event.

                  */

                 PROC_LOCK(p);

                 td->td_dbgflags |= TDB_SCX;

                 _STOPEVENT(p, S_SCX, td->td_dbg_sc_code);

                 if ((p->p_stops & S_PT_SCX) != 0)

                         ptracestop(td, SIGTRAP);

                 td->td_dbgflags &= ~TDB_SCX;

                 PROC_UNLOCK(p);

         }


         userret(td, frame);


 #ifdef KTRACE

         if (KTRPOINT(td, KTR_SYSRET))

                 ktrsysret(SYS_fork, 0, 0);

 #endif

         mtx_assert(&Giant, MA_NOTOWNED);

 }

allproc
struct proclist allproc
Definition: kern_proc.c:134

fork_args
Definition: kern_fork.c:97

sigacts_copy
void sigacts_copy(struct sigacts *dest, struct sigacts *src)
Definition: kern_sig.c:3485

userret
void userret(struct thread *td, struct trapframe *frame)
Definition: subr_trap.c:101

fork_norfproc
static int fork_norfproc(struct thread *td, int flags)
Definition: kern_fork.c:319

sigacts_alloc
struct sigacts * sigacts_alloc(void)
Definition: kern_sig.c:3452

fd
int fd
Definition: kern_exec.c:199

nprocs
int nprocs
Definition: kern_fork.c:184

thread_single_end
void thread_single_end(void)
Definition: kern_thread.c:958

__FBSDID
__FBSDID("$BSDSUniX$")

lim_cur
rlim_t lim_cur(struct proc *p, int which)
Definition: kern_resource.c:1180

pid_max
pid_t pid_max
Definition: subr_param.c:97

fork_findpid
static int fork_findpid(int flags)
Definition: kern_fork.c:227

priv_check_cred
int priv_check_cred(struct ucred *cred, int priv, int flags)
Definition: kern_priv.c:76

ppsratecheck
int ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
Definition: kern_time.c:948

pstats_fork
void pstats_fork(struct pstats *src, struct pstats *dst)
Definition: kern_proc.c:1040

filedesc_to_leader_alloc
struct filedesc_to_leader * filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
Definition: kern_descrip.c:2882

sched_fork_exit
void sched_fork_exit(struct thread *td)
Definition: sched_4bsd.c:1685

proc0
struct proc proc0
Definition: init_main.c:99

prison_proc_hold
void prison_proc_hold(struct prison *pr)
Definition: kern_jail.c:2641

ppeers_lock
struct mtx ppeers_lock
Definition: kern_proc.c:138

proc_linkup
void proc_linkup(struct proc *p, struct thread *td)
Definition: kern_thread.c:252

racct_proc_fork
int racct_proc_fork(struct proc *parent, struct proc *child)
Definition: kern_racct.c:1273

initproc
struct proc * initproc
Definition: init_main.c:102

maxproc
int maxproc
Definition: subr_param.c:87

falloc
int falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags)
Definition: kern_descrip.c:1666

kern_psignal
void kern_psignal(struct proc *p, int sig)
Definition: kern_sig.c:1975

proc_zone
uma_zone_t proc_zone
Definition: kern_proc.c:139

fork1
int fork1(struct thread *td, int flags, int pages, struct proc **procp, int *procdescp, int pdflags)
Definition: kern_fork.c:759

SYSCTL_PROC
SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_kern_randompid,"I","Random PID modulus")

vref
void vref(struct vnode *vp)
Definition: vfs_subr.c:2302

sched_add
void sched_add(struct thread *td, int flags)
Definition: sched_4bsd.c:1258

startprofclock
void startprofclock(struct proc *p)
Definition: kern_clock.c:652

thread_alloc
struct thread * thread_alloc(int pages)
Definition: kern_thread.c:342

fdunshare
void fdunshare(struct proc *p, struct thread *td)
Definition: kern_descrip.c:1839

allproc_lock
struct sx allproc_lock
Definition: kern_proc.c:136

sys_pdfork
int sys_pdfork(struct thread *td, struct pdfork_args *uap)
Definition: kern_fork.c:119

fork_args::dummy
int dummy
Definition: kern_fork.c:98

racct_proc_fork_done
void racct_proc_fork_done(struct proc *child)
Definition: kern_racct.c:1280

fork_return
void fork_return(struct thread *td, struct trapframe *frame)
Definition: kern_fork.c:1024

fdinit
struct filedesc * fdinit(struct filedesc *fdp)
Definition: kern_descrip.c:1755

Giant
struct mtx Giant
Definition: kern_mutex.c:140

SDT_PROVIDER_DECLARE
SDT_PROVIDER_DECLARE(proc)

sched_fork
void sched_fork(struct thread *td, struct thread *childtd)
Definition: sched_4bsd.c:786

lim_fork
void lim_fork(struct proc *p1, struct proc *p2)
Definition: kern_resource.c:1131

chgproccnt
int chgproccnt(struct uidinfo *uip, int diff, rlim_t max)
Definition: kern_resource.c:1357

sysctl_kern_randompid
static int sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
Definition: kern_fork.c:200

thread_alloc_stack
int thread_alloc_stack(struct thread *td, int pages)
Definition: kern_thread.c:359

ptracestop
int ptracestop(struct thread *td, int sig)
Definition: kern_sig.c:2434

sysctl_handle_int
int sysctl_handle_int(SYSCTL_HANDLER_ARGS)
Definition: kern_sysctl.c:986

tidhash_add
void tidhash_add(struct thread *td)
Definition: kern_thread.c:1043

thread_single
int thread_single(int mode)
Definition: kern_thread.c:616

sys_fork
int sys_fork(struct thread *td, struct fork_args *uap)
Definition: kern_fork.c:104

crhold
struct ucred * crhold(struct ucred *cr)
Definition: kern_prot.c:1824

zombproc
struct proclist zombproc
Definition: kern_proc.c:135

pause
int pause(const char *wmesg, int timo)
Definition: kern_synch.c:350

randompid
static int randompid
Definition: kern_fork.c:197

printf
int printf(const char *fmt,...)
Definition: subr_prf.c:367

fdfree
void fdfree(struct thread *td)
Definition: kern_descrip.c:1908

do_fork
static void do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, struct vmspace *vm2, int pdflags)
Definition: kern_fork.c:369

fork_exit
void fork_exit(void(*callout)(void *, struct trapframe *), void *arg, struct trapframe *frame)
Definition: kern_fork.c:968

sysctl_wire_old_buffer
int sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
Definition: kern_sysctl.c:1364

SDT_PROBE_DEFINE3
SDT_PROBE_DEFINE3(proc, kernel,, create,"struct proc *","struct proc *","int")

sys_rfork
int sys_rfork(struct thread *td, struct rfork_args *uap)
Definition: kern_fork.c:166

fdcopy
struct filedesc * fdcopy(struct filedesc *fdp)
Definition: kern_descrip.c:1859

racct_proc_exit
void racct_proc_exit(struct proc *p)
Definition: kern_racct.c:1285

sigacts_hold
struct sigacts * sigacts_hold(struct sigacts *ps)
Definition: kern_sig.c:3476

thread_stash
void thread_stash(struct thread *td)
Definition: kern_thread.c:304

ticks
volatile int ticks
Definition: kern_clock.c:387

microuptime
void microuptime(struct timeval *tvp)
Definition: kern_tc.c:194

knote_fork
void knote_fork(struct knlist *list, int pid)
Definition: kern_event.c:452

proc_reparent
void proc_reparent(struct proc *child, struct proc *parent)
Definition: kern_exit.c:1306

proctree_lock
struct sx proctree_lock
Definition: kern_proc.c:137

SYSCTL_INT
SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD,&lastpid, 0,"Last used PID")

sys_vfork
int sys_vfork(struct thread *td, struct vfork_args *uap)
Definition: kern_fork.c:147

kthread_exit
void kthread_exit(void)
Definition: kern_kthread.c:322

knlist_init_mtx
void knlist_init_mtx(struct knlist *knl, struct mtx *lock)
Definition: kern_event.c:1995

pargs_hold
void pargs_hold(struct pargs *pa)
Definition: kern_proc.c:1455

fdshare
struct filedesc * fdshare(struct filedesc *fdp)
Definition: kern_descrip.c:1826

hz
int hz
Definition: subr_param.c:84

lastpid
int lastpid
Definition: kern_fork.c:185