103 #include <sys/cdefs.h>
106 #include "opt_inet.h"
107 #include "opt_inet6.h"
108 #include "opt_zero.h"
109 #include "opt_compat.h"
111 #include <sys/param.h>
112 #include <sys/systm.h>
113 #include <sys/fcntl.h>
114 #include <sys/limits.h>
115 #include <sys/lock.h>
117 #include <sys/malloc.h>
118 #include <sys/mbuf.h>
119 #include <sys/mutex.h>
120 #include <sys/domain.h>
121 #include <sys/file.h>
122 #include <sys/kernel.h>
123 #include <sys/event.h>
124 #include <sys/eventhandler.h>
125 #include <sys/poll.h>
126 #include <sys/proc.h>
127 #include <sys/protosw.h>
128 #include <sys/socket.h>
129 #include <sys/socketvar.h>
130 #include <sys/resourcevar.h>
131 #include <net/route.h>
132 #include <sys/signalvar.h>
133 #include <sys/stat.h>
135 #include <sys/sysctl.h>
137 #include <sys/jail.h>
138 #include <sys/syslog.h>
139 #include <netinet/in.h>
141 #include <net/vnet.h>
143 #include <security/mac/mac_framework.h>
148 #include <sys/mount.h>
149 #include <sys/sysent.h>
150 #include <compat/compat32bit/compat32bit.h>
183 #define VNET_SO_ASSERT(so) \
184 VNET_ASSERT(curvnet != NULL, \
185 ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
201 if (error || !req->newptr )
204 if (val < 1 || val > USHRT_MAX)
212 "Maximum listen socket pending connection accept queue size");
215 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
216 &numopensockets, 0,
"Number of open sockets");
218 #ifdef ZERO_COPY_SOCKETS
220 int so_zero_copy_send = 1;
221 int so_zero_copy_receive = 1;
222 SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
223 "Zero copy controls");
224 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
225 &so_zero_copy_receive, 0,
"Enable zero copy receive");
226 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
227 &so_zero_copy_send, 0,
"Enable zero copy send");
248 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0,
"IPC");
261 uma_zone_set_max(socket_zone, maxsockets);
268 socket_zone = uma_zcreate(
"socket",
sizeof(
struct socket), NULL, NULL,
269 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
270 uma_zone_set_max(socket_zone, maxsockets);
272 EVENTHANDLER_PRI_FIRST);
284 TUNABLE_INT_FETCH(
"kern.ipc.maxsockets", &maxsockets);
285 maxsockets = imax(maxsockets,
maxfiles);
296 int error, newmaxsockets;
300 if (error == 0 && req->newptr) {
301 if (newmaxsockets > maxsockets &&
303 maxsockets = newmaxsockets;
304 EVENTHANDLER_INVOKE(maxsockets_change);
310 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
312 "Maximum number of sockets avaliable");
328 static struct socket *
333 so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO);
337 if (mac_socket_init(so, M_NOWAIT) != 0) {
338 uma_zfree(socket_zone, so);
342 SOCKBUF_LOCK_INIT(&so->so_snd,
"so_snd");
343 SOCKBUF_LOCK_INIT(&so->so_rcv,
"so_rcv");
344 sx_init(&so->so_snd.sb_sx,
"so_snd_sx");
345 sx_init(&so->so_rcv.sb_sx,
"so_rcv_sx");
346 TAILQ_INIT(&so->so_aiojobq);
351 VNET_ASSERT(vnet != NULL, (
"%s:%d vnet is NULL, so=%p",
352 __func__, __LINE__, so));
353 vnet->vnet_sockcnt++;
369 KASSERT(so->so_count == 0, (
"sodealloc(): so_count %d", so->so_count));
370 KASSERT(so->so_pcb == NULL, (
"sodealloc(): so_pcb != NULL"));
376 VNET_ASSERT(so->so_vnet != NULL, (
"%s:%d so_vnet is NULL, so=%p",
377 __func__, __LINE__, so));
378 so->so_vnet->vnet_sockcnt--;
381 if (so->so_rcv.sb_hiwat)
383 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
384 if (so->so_snd.sb_hiwat)
386 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
389 if (so->so_accf != NULL)
393 mac_socket_destroy(so);
398 SOCKBUF_LOCK_DESTROY(&so->so_snd);
399 SOCKBUF_LOCK_DESTROY(&so->so_rcv);
400 uma_zfree(socket_zone, so);
409 struct ucred *cred,
struct thread *td)
420 if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL ||
422 return (EPROTONOSUPPORT);
425 return (EPROTONOSUPPORT);
427 if (prp->pr_type != type)
429 so =
soalloc(CRED_TO_VNET(cred));
433 TAILQ_INIT(&so->so_incomp);
434 TAILQ_INIT(&so->so_comp);
436 so->so_cred =
crhold(cred);
437 if ((prp->pr_domain->dom_family == PF_INET) ||
438 (prp->pr_domain->dom_family == PF_INET6) ||
439 (prp->pr_domain->dom_family == PF_ROUTE))
440 so->so_fibnum = td->td_proc->p_fibnum;
445 mac_socket_create(cred, so);
454 CURVNET_SET(so->so_vnet);
455 error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
458 KASSERT(so->so_count == 1, (
"socreate: so_count %d",
469 static int regression_sonewconn_earlytest = 1;
470 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
471 ®ression_sonewconn_earlytest, 0,
"Perform early sonewconn limit test");
486 static struct timeval lastover;
487 static struct timeval overinterval = { 60, 0 };
488 static int overcount;
494 over = (head->so_qlen > 3 * head->so_qlimit / 2);
497 if (regression_sonewconn_earlytest && over) {
503 if (
ratecheck(&lastover, &overinterval)) {
504 log(LOG_DEBUG,
"%s: pcb %p: Listen queue overflow: "
505 "%i already in queue awaiting acceptance "
506 "(%d occurrences)\n",
507 __func__, head->so_pcb, head->so_qlen, overcount);
514 VNET_ASSERT(head->so_vnet != NULL, (
"%s:%d so_vnet is NULL, head=%p",
515 __func__, __LINE__, head));
518 log(LOG_DEBUG,
"%s: pcb %p: New socket allocation failure: "
519 "limit reached or out of memory\n",
520 __func__, head->so_pcb);
523 if ((head->so_options & SO_ACCEPTFILTER) != 0)
526 so->so_type = head->so_type;
527 so->so_options = head->so_options &~ SO_ACCEPTCONN;
528 so->so_linger = head->so_linger;
529 so->so_state = head->so_state | SS_NOFDREF;
530 so->so_fibnum = head->so_fibnum;
531 so->so_proto = head->so_proto;
532 so->so_cred =
crhold(head->so_cred);
534 mac_socket_newconn(head, so);
539 if (
soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
541 log(LOG_DEBUG,
"%s: pcb %p: soreserve() failed\n",
542 __func__, head->so_pcb);
545 if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
547 log(LOG_DEBUG,
"%s: pcb %p: pru_attach() failed\n",
548 __func__, head->so_pcb);
551 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
552 so->so_snd.sb_lowat = head->so_snd.sb_lowat;
553 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
554 so->so_snd.sb_timeo = head->so_snd.sb_timeo;
555 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
556 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
557 so->so_state |= connstatus;
565 if (!(head->so_options & SO_ACCEPTCONN) &&
566 ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
567 (head->so_type != SOCK_SEQPACKET))) {
574 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
575 so->so_qstate |= SQ_COMP;
585 while (head->so_incqlen > head->so_qlimit) {
587 sp = TAILQ_FIRST(&head->so_incomp);
588 TAILQ_REMOVE(&head->so_incomp, sp, so_list);
590 sp->so_qstate &= ~SQ_INCOMP;
596 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
597 so->so_qstate |= SQ_INCOMP;
609 sobind(
struct socket *so,
struct sockaddr *nam,
struct thread *td)
613 CURVNET_SET(so->so_vnet);
614 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
632 solisten(
struct socket *so,
int backlog,
struct thread *td)
636 CURVNET_SET(so->so_vnet);
637 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
646 SOCK_LOCK_ASSERT(so);
648 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
658 SOCK_LOCK_ASSERT(so);
662 so->so_qlimit = backlog;
663 so->so_options |= SO_ACCEPTCONN;
689 struct protosw *
pr = so->so_proto;
692 ACCEPT_LOCK_ASSERT();
693 SOCK_LOCK_ASSERT(so);
695 if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
696 (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) {
704 KASSERT((so->so_qstate & SQ_COMP) != 0 ||
705 (so->so_qstate & SQ_INCOMP) != 0,
706 (
"sofree: so_head != NULL, but neither SQ_COMP nor "
708 KASSERT((so->so_qstate & SQ_COMP) == 0 ||
709 (so->so_qstate & SQ_INCOMP) == 0,
710 (
"sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP"));
711 TAILQ_REMOVE(&head->so_incomp, so, so_list);
713 so->so_qstate &= ~SQ_INCOMP;
716 KASSERT((so->so_qstate & SQ_COMP) == 0 &&
717 (so->so_qstate & SQ_INCOMP) == 0,
718 (
"sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
719 so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
720 if (so->so_options & SO_ACCEPTCONN) {
721 KASSERT((TAILQ_EMPTY(&so->so_comp)), (
"sofree: so_comp populated"));
722 KASSERT((TAILQ_EMPTY(&so->so_incomp)), (
"sofree: so_incomp populated"));
728 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
729 (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
730 if (pr->pr_usrreqs->pru_detach != NULL)
731 (*pr->pr_usrreqs->pru_detach)(so);
769 KASSERT(!(so->so_state & SS_NOFDREF), (
"soclose: SS_NOFDREF on enter"));
771 CURVNET_SET(so->so_vnet);
773 if (so->so_state & SS_ISCONNECTED) {
774 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
777 if (error == ENOTCONN)
782 if (so->so_options & SO_LINGER) {
783 if ((so->so_state & SS_ISDISCONNECTING) &&
784 (so->so_state & SS_NBIO))
786 while (so->so_state & SS_ISCONNECTED) {
787 error = tsleep(&so->so_timeo,
788 PSOCK | PCATCH,
"soclos", so->so_linger *
hz);
796 if (so->so_proto->pr_usrreqs->pru_close != NULL)
797 (*so->so_proto->pr_usrreqs->pru_close)(so);
799 if (so->so_options & SO_ACCEPTCONN) {
805 so->so_options &= ~SO_ACCEPTCONN;
806 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
807 TAILQ_REMOVE(&so->so_incomp, sp, so_list);
809 sp->so_qstate &= ~SQ_INCOMP;
815 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
816 TAILQ_REMOVE(&so->so_comp, sp, so_list);
818 sp->so_qstate &= ~SQ_COMP;
824 KASSERT((TAILQ_EMPTY(&so->so_comp)),
825 (
"%s: so_comp populated", __func__));
826 KASSERT((TAILQ_EMPTY(&so->so_incomp)),
827 (
"%s: so_incomp populated", __func__));
830 KASSERT((so->so_state & SS_NOFDREF) == 0, (
"soclose: NOFDREF"));
831 so->so_state |= SS_NOFDREF;
861 KASSERT(so->so_count == 0, (
"soabort: so_count"));
862 KASSERT((so->so_state & SS_PROTOREF) == 0, (
"soabort: SS_PROTOREF"));
863 KASSERT(so->so_state & SS_NOFDREF, (
"soabort: !SS_NOFDREF"));
864 KASSERT((so->so_state & SQ_COMP) == 0, (
"soabort: SQ_COMP"));
865 KASSERT((so->so_state & SQ_INCOMP) == 0, (
"soabort: SQ_INCOMP"));
868 if (so->so_proto->pr_usrreqs->pru_abort != NULL)
869 (*so->so_proto->pr_usrreqs->pru_abort)(so);
881 KASSERT((so->so_state & SS_NOFDREF) != 0, (
"soaccept: !NOFDREF"));
882 so->so_state &= ~SS_NOFDREF;
885 CURVNET_SET(so->so_vnet);
886 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
892 soconnect(
struct socket *so,
struct sockaddr *nam,
struct thread *td)
896 if (so->so_options & SO_ACCEPTCONN)
899 CURVNET_SET(so->so_vnet);
905 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
906 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
915 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
927 CURVNET_SET(so1->so_vnet);
928 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
938 if ((so->so_state & SS_ISCONNECTED) == 0)
940 if (so->so_state & SS_ISDISCONNECTING)
943 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
947 #ifdef ZERO_COPY_SOCKETS
948 struct so_zerocopy_stats{
953 struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
971 sosend_copyin(
struct uio *uio,
struct mbuf **retmp,
int atomic,
long *space,
974 struct mbuf *m, **mp, *top;
983 resid = uio->uio_resid;
987 if (resid >= MINCLSIZE) {
989 m = m_gethdr(M_WAITOK, MT_DATA);
991 m->m_pkthdr.rcvif = NULL;
993 m = m_get(M_WAITOK, MT_DATA);
994 if (so_zero_copy_send &&
995 resid >= PAGE_SIZE &&
996 *space >= PAGE_SIZE &&
997 uio->uio_iov->iov_len >= PAGE_SIZE) {
998 so_zerocp_stats.size_ok++;
999 so_zerocp_stats.align_ok++;
1004 m_clget(m, M_WAITOK);
1005 len = min(min(MCLBYTES, resid), *space);
1009 m = m_gethdr(M_WAIT, MT_DATA);
1010 m->m_pkthdr.len = 0;
1011 m->m_pkthdr.rcvif = NULL;
1013 len = min(min(MHLEN, resid), *space);
1018 if (atomic && m && len < MHLEN)
1021 m = m_get(M_WAIT, MT_DATA);
1022 len = min(min(MLEN, resid), *space);
1034 error =
uiomove(mtod(m,
void *), (
int)len, uio);
1035 resid = uio->uio_resid;
1038 top->m_pkthdr.len += len;
1043 if (flags & MSG_EOR)
1044 top->m_flags |= M_EOR;
1047 }
while (*space > 0 && atomic);
1054 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
1058 struct mbuf *top,
struct mbuf *control,
int flags,
struct thread *td)
1062 int clen = 0, error, dontroute;
1063 #ifdef ZERO_COPY_SOCKETS
1064 int atomic = sosendallatonce(so) || top;
1067 KASSERT(so->so_type == SOCK_DGRAM, (
"sodgram_send: !SOCK_DGRAM"));
1068 KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
1069 (
"sodgram_send: !PR_ATOMIC"));
1072 resid = uio->uio_resid;
1074 resid = top->m_pkthdr.len;
1088 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0;
1090 td->td_ru.ru_msgsnd++;
1091 if (control != NULL)
1092 clen = control->m_len;
1094 SOCKBUF_LOCK(&so->so_snd);
1095 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1096 SOCKBUF_UNLOCK(&so->so_snd);
1101 error = so->so_error;
1103 SOCKBUF_UNLOCK(&so->so_snd);
1106 if ((so->so_state & SS_ISCONNECTED) == 0) {
1112 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1113 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1114 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1115 !(resid == 0 && clen != 0)) {
1116 SOCKBUF_UNLOCK(&so->so_snd);
1120 }
else if (addr == NULL) {
1121 if (so->so_proto->pr_flags & PR_CONNREQUIRED)
1124 error = EDESTADDRREQ;
1125 SOCKBUF_UNLOCK(&so->so_snd);
1134 space = sbspace(&so->so_snd);
1135 if (flags & MSG_OOB)
1138 SOCKBUF_UNLOCK(&so->so_snd);
1139 if (resid > space) {
1145 if (flags & MSG_EOR)
1146 top->m_flags |= M_EOR;
1148 #ifdef ZERO_COPY_SOCKETS
1149 error = sosend_copyin(uio, &top, atomic, &space, flags);
1159 (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
1164 space -= resid - uio->uio_resid;
1166 resid = uio->uio_resid;
1168 KASSERT(resid == 0, (
"sosend_dgram: resid != 0"));
1175 so->so_options |= SO_DONTROUTE;
1187 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1188 (flags & MSG_OOB) ? PRUS_OOB :
1193 ((flags & MSG_EOF) &&
1194 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1198 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1199 top, addr, control, td);
1202 so->so_options &= ~SO_DONTROUTE;
1211 if (control != NULL)
1231 struct mbuf *top,
struct mbuf *control,
int flags,
struct thread *td)
1235 int clen = 0, error, dontroute;
1236 int atomic = sosendallatonce(so) || top;
1239 resid = uio->uio_resid;
1241 resid = top->m_pkthdr.len;
1252 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1258 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
1259 (so->so_proto->pr_flags & PR_ATOMIC);
1261 td->td_ru.ru_msgsnd++;
1262 if (control != NULL)
1263 clen = control->m_len;
1271 SOCKBUF_LOCK(&so->so_snd);
1272 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1273 SOCKBUF_UNLOCK(&so->so_snd);
1278 error = so->so_error;
1280 SOCKBUF_UNLOCK(&so->so_snd);
1283 if ((so->so_state & SS_ISCONNECTED) == 0) {
1290 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
1291 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
1292 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
1293 !(resid == 0 && clen != 0)) {
1294 SOCKBUF_UNLOCK(&so->so_snd);
1298 }
else if (addr == NULL) {
1299 SOCKBUF_UNLOCK(&so->so_snd);
1300 if (so->so_proto->pr_flags & PR_CONNREQUIRED)
1303 error = EDESTADDRREQ;
1307 space = sbspace(&so->so_snd);
1308 if (flags & MSG_OOB)
1310 if ((atomic && resid > so->so_snd.sb_hiwat) ||
1311 clen > so->so_snd.sb_hiwat) {
1312 SOCKBUF_UNLOCK(&so->so_snd);
1316 if (space < resid + clen &&
1317 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
1318 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
1319 SOCKBUF_UNLOCK(&so->so_snd);
1320 error = EWOULDBLOCK;
1323 error =
sbwait(&so->so_snd);
1324 SOCKBUF_UNLOCK(&so->so_snd);
1329 SOCKBUF_UNLOCK(&so->so_snd);
1334 if (flags & MSG_EOR)
1335 top->m_flags |= M_EOR;
1337 #ifdef ZERO_COPY_SOCKETS
1338 error = sosend_copyin(uio, &top, atomic,
1350 (atomic ? M_PKTHDR : 0) |
1351 ((flags & MSG_EOR) ? M_EOR : 0));
1356 space -= resid - uio->uio_resid;
1358 resid = uio->uio_resid;
1362 so->so_options |= SO_DONTROUTE;
1376 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
1377 (flags & MSG_OOB) ? PRUS_OOB :
1383 ((flags & MSG_EOF) &&
1384 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
1388 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1389 top, addr, control, td);
1392 so->so_options &= ~SO_DONTROUTE;
1400 }
while (resid && space > 0);
1408 if (control != NULL)
1414 sosend(
struct socket *so,
struct sockaddr *addr,
struct uio *uio,
1415 struct mbuf *top,
struct mbuf *control,
int flags,
struct thread *td)
1419 CURVNET_SET(so->so_vnet);
1420 error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top,
1421 control, flags, td);
1437 struct protosw *
pr = so->so_proto;
1441 KASSERT(flags & MSG_OOB, (
"soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1444 m = m_get(M_WAIT, MT_DATA);
1445 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1449 #ifdef ZERO_COPY_SOCKETS
1450 if (so_zero_copy_receive) {
1453 if ((m->m_flags & M_EXT)
1454 && (m->m_ext.ext_type == EXT_DISPOSABLE))
1459 error = uiomoveco(mtod(m,
void *),
1460 min(uio->uio_resid, m->m_len),
1464 error =
uiomove(mtod(m,
void *),
1465 (
int) min(uio->uio_resid, m->m_len), uio);
1467 }
while (uio->uio_resid && error == 0 && m);
1482 static __inline
void
1486 SOCKBUF_LOCK_ASSERT(sb);
1491 if (sb->sb_mb != NULL)
1492 sb->sb_mb->m_nextpkt = nextrecord;
1494 sb->sb_mb = nextrecord;
1502 if (sb->sb_mb == NULL) {
1503 sb->sb_mbtail = NULL;
1504 sb->sb_lastrecord = NULL;
1505 }
else if (sb->sb_mb->m_nextpkt == NULL)
1506 sb->sb_lastrecord = sb->sb_mb;
1528 struct mbuf **mp0,
struct mbuf **controlp,
int *flagsp)
1530 struct mbuf *m, **mp;
1531 int flags, error, offset;
1533 struct protosw *
pr = so->so_proto;
1534 struct mbuf *nextrecord;
1536 ssize_t orig_resid = uio->uio_resid;
1541 if (controlp != NULL)
1544 flags = *flagsp &~ MSG_EOR;
1547 if (flags & MSG_OOB)
1551 if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
1552 && uio->uio_resid) {
1554 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
1562 SOCKBUF_LOCK(&so->so_rcv);
1563 m = so->so_rcv.sb_mb;
1570 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
1571 so->so_rcv.sb_cc < uio->uio_resid) &&
1572 so->so_rcv.sb_cc < so->so_rcv.sb_lowat &&
1573 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
1574 KASSERT(m != NULL || !so->so_rcv.sb_cc,
1575 (
"receive: m == %p so->so_rcv.sb_cc == %u",
1576 m, so->so_rcv.sb_cc));
1580 error = so->so_error;
1581 if ((flags & MSG_PEEK) == 0)
1583 SOCKBUF_UNLOCK(&so->so_rcv);
1586 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1587 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
1589 SOCKBUF_UNLOCK(&so->so_rcv);
1594 for (; m != NULL; m = m->m_next)
1595 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
1596 m = so->so_rcv.sb_mb;
1599 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1600 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1601 SOCKBUF_UNLOCK(&so->so_rcv);
1605 if (uio->uio_resid == 0) {
1606 SOCKBUF_UNLOCK(&so->so_rcv);
1609 if ((so->so_state & SS_NBIO) ||
1610 (flags & (MSG_DONTWAIT|MSG_NBIO))) {
1611 SOCKBUF_UNLOCK(&so->so_rcv);
1612 error = EWOULDBLOCK;
1615 SBLASTRECORDCHK(&so->so_rcv);
1616 SBLASTMBUFCHK(&so->so_rcv);
1617 error =
sbwait(&so->so_rcv);
1618 SOCKBUF_UNLOCK(&so->so_rcv);
1639 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1641 uio->uio_td->td_ru.ru_msgrcv++;
1642 KASSERT(m == so->so_rcv.sb_mb, (
"soreceive: m != so->so_rcv.sb_mb"));
1643 SBLASTRECORDCHK(&so->so_rcv);
1644 SBLASTMBUFCHK(&so->so_rcv);
1645 nextrecord = m->m_nextpkt;
1646 if (pr->pr_flags & PR_ADDR) {
1647 KASSERT(m->m_type == MT_SONAME,
1648 (
"m->m_type == %d", m->m_type));
1653 if (flags & MSG_PEEK) {
1656 sbfree(&so->so_rcv, m);
1657 so->so_rcv.sb_mb = m_free(m);
1658 m = so->so_rcv.sb_mb;
1669 if (m != NULL && m->m_type == MT_CONTROL) {
1670 struct mbuf *cm = NULL, *cmn;
1671 struct mbuf **cme = &cm;
1674 if (flags & MSG_PEEK) {
1675 if (controlp != NULL) {
1676 *controlp = m_copy(m, 0, m->m_len);
1677 controlp = &(*controlp)->m_next;
1681 sbfree(&so->so_rcv, m);
1682 so->so_rcv.sb_mb = m->m_next;
1685 cme = &(*cme)->m_next;
1686 m = so->so_rcv.sb_mb;
1688 }
while (m != NULL && m->m_type == MT_CONTROL);
1689 if ((flags & MSG_PEEK) == 0)
1691 while (cm != NULL) {
1694 if (pr->pr_domain->dom_externalize != NULL) {
1695 SOCKBUF_UNLOCK(&so->so_rcv);
1697 error = (*pr->pr_domain->dom_externalize)
1699 SOCKBUF_LOCK(&so->so_rcv);
1700 }
else if (controlp != NULL)
1704 if (controlp != NULL) {
1706 while (*controlp != NULL)
1707 controlp = &(*controlp)->m_next;
1712 nextrecord = so->so_rcv.sb_mb->m_nextpkt;
1714 nextrecord = so->so_rcv.sb_mb;
1718 if ((flags & MSG_PEEK) == 0) {
1719 KASSERT(m->m_nextpkt == nextrecord,
1720 (
"soreceive: post-control, nextrecord !sync"));
1721 if (nextrecord == NULL) {
1722 KASSERT(so->so_rcv.sb_mb == m,
1723 (
"soreceive: post-control, sb_mb!=m"));
1724 KASSERT(so->so_rcv.sb_lastrecord == m,
1725 (
"soreceive: post-control, lastrecord!=m"));
1729 if (type == MT_OOBDATA)
1732 if ((flags & MSG_PEEK) == 0) {
1733 KASSERT(so->so_rcv.sb_mb == nextrecord,
1734 (
"soreceive: sb_mb != nextrecord"));
1735 if (so->so_rcv.sb_mb == NULL) {
1736 KASSERT(so->so_rcv.sb_lastrecord == NULL,
1737 (
"soreceive: sb_lastercord != NULL"));
1741 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1742 SBLASTRECORDCHK(&so->so_rcv);
1743 SBLASTMBUFCHK(&so->so_rcv);
1755 while (m != NULL && uio->uio_resid > 0 && error == 0) {
1760 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1761 if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
1762 if (type != m->m_type)
1764 }
else if (type == MT_OOBDATA)
1767 KASSERT(m->m_type == MT_DATA,
1768 (
"m->m_type == %d", m->m_type));
1769 so->so_rcv.sb_state &= ~SBS_RCVATMARK;
1770 len = uio->uio_resid;
1771 if (so->so_oobmark && len > so->so_oobmark - offset)
1772 len = so->so_oobmark - offset;
1773 if (len > m->m_len - moff)
1774 len = m->m_len - moff;
1783 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1784 SBLASTRECORDCHK(&so->so_rcv);
1785 SBLASTMBUFCHK(&so->so_rcv);
1786 SOCKBUF_UNLOCK(&so->so_rcv);
1787 #ifdef ZERO_COPY_SOCKETS
1788 if (so_zero_copy_receive) {
1791 if ((m->m_flags & M_EXT)
1792 && (m->m_ext.ext_type == EXT_DISPOSABLE))
1797 error = uiomoveco(mtod(m,
char *) + moff,
1802 error =
uiomove(mtod(m,
char *) + moff, (
int)len, uio);
1803 SOCKBUF_LOCK(&so->so_rcv);
1813 if (m && pr->pr_flags & PR_ATOMIC &&
1814 ((flags & MSG_PEEK) == 0))
1816 SOCKBUF_UNLOCK(&so->so_rcv);
1820 uio->uio_resid -= len;
1821 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1822 if (len == m->m_len - moff) {
1823 if (m->m_flags & M_EOR)
1825 if (flags & MSG_PEEK) {
1829 nextrecord = m->m_nextpkt;
1830 sbfree(&so->so_rcv, m);
1834 so->so_rcv.sb_mb = m = m->m_next;
1837 so->so_rcv.sb_mb = m_free(m);
1838 m = so->so_rcv.sb_mb;
1841 SBLASTRECORDCHK(&so->so_rcv);
1842 SBLASTMBUFCHK(&so->so_rcv);
1845 if (flags & MSG_PEEK)
1851 if (flags & MSG_DONTWAIT)
1852 copy_flag = M_DONTWAIT;
1855 if (copy_flag == M_WAIT)
1856 SOCKBUF_UNLOCK(&so->so_rcv);
1857 *mp =
m_copym(m, 0, len, copy_flag);
1858 if (copy_flag == M_WAIT)
1859 SOCKBUF_LOCK(&so->so_rcv);
1869 uio->uio_resid += len;
1875 so->so_rcv.sb_cc -= len;
1878 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1879 if (so->so_oobmark) {
1880 if ((flags & MSG_PEEK) == 0) {
1881 so->so_oobmark -= len;
1882 if (so->so_oobmark == 0) {
1883 so->so_rcv.sb_state |= SBS_RCVATMARK;
1888 if (offset == so->so_oobmark)
1892 if (flags & MSG_EOR)
1901 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
1902 !sosendallatonce(so) && nextrecord == NULL) {
1903 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1904 if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE)
1910 if (pr->pr_flags & PR_WANTRCVD) {
1911 SOCKBUF_UNLOCK(&so->so_rcv);
1913 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1914 SOCKBUF_LOCK(&so->so_rcv);
1916 SBLASTRECORDCHK(&so->so_rcv);
1917 SBLASTMBUFCHK(&so->so_rcv);
1922 if (so->so_rcv.sb_mb == NULL) {
1923 error =
sbwait(&so->so_rcv);
1925 SOCKBUF_UNLOCK(&so->so_rcv);
1929 m = so->so_rcv.sb_mb;
1931 nextrecord = m->m_nextpkt;
1935 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1936 if (m != NULL && pr->pr_flags & PR_ATOMIC) {
1938 if ((flags & MSG_PEEK) == 0)
1941 if ((flags & MSG_PEEK) == 0) {
1948 so->so_rcv.sb_mb = nextrecord;
1949 if (so->so_rcv.sb_mb == NULL) {
1950 so->so_rcv.sb_mbtail = NULL;
1951 so->so_rcv.sb_lastrecord = NULL;
1952 }
else if (nextrecord->m_nextpkt == NULL)
1953 so->so_rcv.sb_lastrecord = nextrecord;
1955 SBLASTRECORDCHK(&so->so_rcv);
1956 SBLASTMBUFCHK(&so->so_rcv);
1962 if (!(flags & MSG_SOCALLBCK) &&
1963 (pr->pr_flags & PR_WANTRCVD)) {
1964 SOCKBUF_UNLOCK(&so->so_rcv);
1966 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
1967 SOCKBUF_LOCK(&so->so_rcv);
1970 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1971 if (orig_resid == uio->uio_resid && orig_resid &&
1972 (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
1973 SOCKBUF_UNLOCK(&so->so_rcv);
1976 SOCKBUF_UNLOCK(&so->so_rcv);
1991 struct mbuf **mp0,
struct mbuf **controlp,
int *flagsp)
1993 int len = 0, error = 0, flags, oresid;
1995 struct mbuf *m, *n = NULL;
1998 if (so->so_type != SOCK_STREAM)
2002 if (controlp != NULL)
2005 flags = *flagsp &~ MSG_EOR;
2008 if (flags & MSG_OOB)
2022 if (uio->uio_resid == 0) {
2026 oresid = uio->uio_resid;
2029 if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
2035 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
2041 if (oresid > uio->uio_resid)
2043 error = so->so_error;
2044 if (!(flags & MSG_PEEK))
2050 if (sb->sb_state & SBS_CANTRCVMORE) {
2058 if (sb->sb_cc == 0 &&
2059 ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
2065 if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
2066 ((so->so_state & SS_NBIO) ||
2067 (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
2068 sb->sb_cc >= sb->sb_lowat ||
2069 sb->sb_cc >= uio->uio_resid ||
2070 sb->sb_cc >= sb->sb_hiwat) ) {
2075 if ((flags & MSG_WAITALL) &&
2076 (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_hiwat))
2089 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
2090 KASSERT(sb->sb_cc > 0, (
"%s: sockbuf empty", __func__));
2091 KASSERT(sb->sb_mb != NULL, (
"%s: sb_mb == NULL", __func__));
2095 uio->uio_td->td_ru.ru_msgrcv++;
2098 len = min(uio->uio_resid, sb->sb_cc);
2101 if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
2105 m_cat(*mp0, sb->sb_mb);
2107 m != NULL && m->m_len <= len;
2110 uio->uio_resid -= m->m_len;
2116 sb->sb_lastrecord = sb->sb_mb;
2117 if (sb->sb_mb == NULL)
2122 KASSERT(sb->sb_mb != NULL,
2123 (
"%s: len > 0 && sb->sb_mb empty", __func__));
2125 m =
m_copym(sb->sb_mb, 0, len, M_DONTWAIT);
2129 uio->uio_resid -= len;
2147 SBLASTRECORDCHK(sb);
2154 if (!(flags & MSG_PEEK)) {
2159 if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
2160 (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
2161 !(flags & MSG_SOCALLBCK))) {
2164 (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
2173 if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
2176 SOCKBUF_LOCK_ASSERT(sb);
2177 SBLASTRECORDCHK(sb);
2192 struct mbuf **mp0,
struct mbuf **controlp,
int *flagsp)
2194 struct mbuf *m, *m2;
2197 struct protosw *
pr = so->so_proto;
2198 struct mbuf *nextrecord;
2202 if (controlp != NULL)
2205 flags = *flagsp &~ MSG_EOR;
2213 if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB))
2220 KASSERT((pr->pr_flags & PR_WANTRCVD) == 0,
2221 (
"soreceive_dgram: wantrcvd"));
2222 KASSERT(pr->pr_flags & PR_ATOMIC, (
"soreceive_dgram: !atomic"));
2223 KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0,
2224 (
"soreceive_dgram: SBS_RCVATMARK"));
2225 KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0,
2226 (
"soreceive_dgram: P_CONNREQUIRED"));
2231 SOCKBUF_LOCK(&so->so_rcv);
2232 while ((m = so->so_rcv.sb_mb) == NULL) {
2233 KASSERT(so->so_rcv.sb_cc == 0,
2234 (
"soreceive_dgram: sb_mb NULL but sb_cc %u",
2237 error = so->so_error;
2239 SOCKBUF_UNLOCK(&so->so_rcv);
2242 if (so->so_rcv.sb_state & SBS_CANTRCVMORE ||
2243 uio->uio_resid == 0) {
2244 SOCKBUF_UNLOCK(&so->so_rcv);
2247 if ((so->so_state & SS_NBIO) ||
2248 (flags & (MSG_DONTWAIT|MSG_NBIO))) {
2249 SOCKBUF_UNLOCK(&so->so_rcv);
2250 return (EWOULDBLOCK);
2252 SBLASTRECORDCHK(&so->so_rcv);
2253 SBLASTMBUFCHK(&so->so_rcv);
2254 error =
sbwait(&so->so_rcv);
2256 SOCKBUF_UNLOCK(&so->so_rcv);
2260 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
2263 uio->uio_td->td_ru.ru_msgrcv++;
2264 SBLASTRECORDCHK(&so->so_rcv);
2265 SBLASTMBUFCHK(&so->so_rcv);
2266 nextrecord = m->m_nextpkt;
2267 if (nextrecord == NULL) {
2268 KASSERT(so->so_rcv.sb_lastrecord == m,
2269 (
"soreceive_dgram: lastrecord != m"));
2272 KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord,
2273 (
"soreceive_dgram: m_nextpkt != nextrecord"));
2278 so->so_rcv.sb_mb = NULL;
2284 for (m2 = m; m2 != NULL; m2 = m2->m_next)
2285 sbfree(&so->so_rcv, m2);
2290 SBLASTRECORDCHK(&so->so_rcv);
2291 SBLASTMBUFCHK(&so->so_rcv);
2292 SOCKBUF_UNLOCK(&so->so_rcv);
2294 if (pr->pr_flags & PR_ADDR) {
2295 KASSERT(m->m_type == MT_SONAME,
2296 (
"m->m_type == %d", m->m_type));
2317 if (m->m_type == MT_CONTROL) {
2318 struct mbuf *cm = NULL, *cmn;
2319 struct mbuf **cme = &cm;
2325 cme = &(*cme)->m_next;
2327 }
while (m != NULL && m->m_type == MT_CONTROL);
2328 while (cm != NULL) {
2331 if (pr->pr_domain->dom_externalize != NULL) {
2332 error = (*pr->pr_domain->dom_externalize)
2334 }
else if (controlp != NULL)
2338 if (controlp != NULL) {
2339 while (*controlp != NULL)
2340 controlp = &(*controlp)->m_next;
2345 KASSERT(m == NULL || m->m_type == MT_DATA,
2346 (
"soreceive_dgram: !data"));
2347 while (m != NULL && uio->uio_resid > 0) {
2348 len = uio->uio_resid;
2351 error =
uiomove(mtod(m,
char *), (
int)len, uio);
2356 if (len == m->m_len)
2373 soreceive(
struct socket *so,
struct sockaddr **psa,
struct uio *uio,
2374 struct mbuf **mp0,
struct mbuf **controlp,
int *flagsp)
2378 CURVNET_SET(so->so_vnet);
2379 error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
2388 struct protosw *
pr = so->so_proto;
2391 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
2394 CURVNET_SET(so->so_vnet);
2395 if (pr->pr_usrreqs->pru_flush != NULL) {
2396 (*pr->pr_usrreqs->pru_flush)(so, how);
2400 if (how != SHUT_RD) {
2401 error = (*pr->pr_usrreqs->pru_shutdown)(so);
2414 struct sockbuf *sb = &so->so_rcv;
2415 struct protosw *
pr = so->so_proto;
2434 (void)
sblock(sb, SBL_WAIT | SBL_NOINTR);
2441 bzero(&asb, offsetof(
struct sockbuf, sb_startzero));
2442 bcopy(&sb->sb_startzero, &asb.sb_startzero,
2443 sizeof(*sb) - offsetof(
struct sockbuf, sb_startzero));
2444 bzero(&sb->sb_startzero,
2445 sizeof(*sb) - offsetof(
struct sockbuf, sb_startzero));
2453 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
2454 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
2476 if ((valsize = sopt->sopt_valsize) < minlen)
2479 sopt->sopt_valsize = valsize = len;
2481 if (sopt->sopt_td != NULL)
2482 return (copyin(sopt->sopt_val, buf, valsize));
2484 bcopy(sopt->sopt_val, buf, valsize);
2497 struct sockopt sopt;
2499 sopt.sopt_level =
level;
2500 sopt.sopt_name = optname;
2501 sopt.sopt_dir = SOPT_SET;
2502 sopt.sopt_val = optval;
2503 sopt.sopt_valsize = optlen;
2504 sopt.sopt_td = NULL;
2520 CURVNET_SET(so->so_vnet);
2522 if (sopt->sopt_level != SOL_SOCKET) {
2523 if (so->so_proto->pr_ctloutput != NULL) {
2524 error = (*so->so_proto->pr_ctloutput)(so, sopt);
2528 error = ENOPROTOOPT;
2530 switch (sopt->sopt_name) {
2532 case SO_ACCEPTFILTER:
2539 error =
sooptcopyin(sopt, &l,
sizeof l,
sizeof l);
2544 so->so_linger = l.l_linger;
2546 so->so_options |= SO_LINGER;
2548 so->so_options &= ~SO_LINGER;
2555 case SO_USELOOPBACK:
2571 so->so_options |= sopt->sopt_name;
2573 so->so_options &= ~sopt->sopt_name;
2583 if (optval < 0 || optval >= rt_numfibs) {
2587 if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
2588 (so->so_proto->pr_domain->dom_family == PF_INET6) ||
2589 (so->so_proto->pr_domain->dom_family == PF_ROUTE)))
2590 so->so_fibnum = optval;
2595 case SO_USER_COOKIE:
2600 so->so_user_cookie = val32;
2621 switch (sopt->sopt_name) {
2624 if (
sbreserve(sopt->sopt_name == SO_SNDBUF ?
2625 &so->so_snd : &so->so_rcv, (u_long)optval,
2626 so, curthread) == 0) {
2630 (sopt->sopt_name == SO_SNDBUF ? &so->so_snd :
2631 &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE;
2639 SOCKBUF_LOCK(&so->so_snd);
2640 so->so_snd.sb_lowat =
2641 (optval > so->so_snd.sb_hiwat) ?
2642 so->so_snd.sb_hiwat : optval;
2643 SOCKBUF_UNLOCK(&so->so_snd);
2646 SOCKBUF_LOCK(&so->so_rcv);
2647 so->so_rcv.sb_lowat =
2648 (optval > so->so_rcv.sb_hiwat) ?
2649 so->so_rcv.sb_hiwat : optval;
2650 SOCKBUF_UNLOCK(&so->so_rcv);
2658 if (SV_CURPROC_FLAG(SV_ILP32)) {
2659 struct timeval32 tv32;
2663 CP(tv32, tv, tv_sec);
2664 CP(tv32, tv, tv_usec);
2671 if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
2672 tv.tv_usec >= 1000000) {
2678 switch (sopt->sopt_name) {
2680 so->so_snd.sb_timeo = val;
2683 so->so_rcv.sb_timeo = val;
2694 error = mac_setsockopt_label(sopt->sopt_td->td_ucred,
2702 error = ENOPROTOOPT;
2705 if (error == 0 && so->so_proto->pr_ctloutput != NULL)
2706 (void)(*so->so_proto->pr_ctloutput)(so, sopt);
2732 valsize = min(len, sopt->sopt_valsize);
2733 sopt->sopt_valsize = valsize;
2734 if (sopt->sopt_val != NULL) {
2735 if (sopt->sopt_td != NULL)
2736 error = copyout(buf, sopt->sopt_val, valsize);
2738 bcopy(buf, sopt->sopt_val, valsize);
2753 CURVNET_SET(so->so_vnet);
2755 if (sopt->sopt_level != SOL_SOCKET) {
2756 if (so->so_proto->pr_ctloutput != NULL)
2757 error = (*so->so_proto->pr_ctloutput)(so, sopt);
2759 error = ENOPROTOOPT;
2763 switch (sopt->sopt_name) {
2765 case SO_ACCEPTFILTER:
2771 l.l_onoff = so->so_options & SO_LINGER;
2772 l.l_linger = so->so_linger;
2777 case SO_USELOOPBACK:
2789 optval = so->so_options & sopt->sopt_name;
2795 optval = so->so_type;
2799 optval = so->so_proto->pr_protocol;
2804 optval = so->so_error;
2810 optval = so->so_snd.sb_hiwat;
2814 optval = so->so_rcv.sb_hiwat;
2818 optval = so->so_snd.sb_lowat;
2822 optval = so->so_rcv.sb_lowat;
2827 optval = (sopt->sopt_name == SO_SNDTIMEO ?
2828 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
2830 tv.tv_sec = optval /
hz;
2831 tv.tv_usec = (optval %
hz) *
tick;
2833 if (SV_CURPROC_FLAG(SV_ILP32)) {
2834 struct timeval32 tv32;
2836 CP(tv, tv32, tv_sec);
2837 CP(tv, tv32, tv_usec);
2846 error =
sooptcopyin(sopt, &extmac,
sizeof(extmac),
2850 error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
2862 error =
sooptcopyin(sopt, &extmac,
sizeof(extmac),
2866 error = mac_getsockopt_peerlabel(
2867 sopt->sopt_td->td_ucred, so, &extmac);
2876 case SO_LISTENQLIMIT:
2877 optval = so->so_qlimit;
2881 optval = so->so_qlen;
2884 case SO_LISTENINCQLEN:
2885 optval = so->so_incqlen;
2889 error = ENOPROTOOPT;
2904 struct mbuf *m, *m_prev;
2905 int sopt_size = sopt->sopt_valsize;
2907 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
2910 if (sopt_size > MLEN) {
2911 MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
2912 if ((m->m_flags & M_EXT) == 0) {
2916 m->m_len = min(MCLBYTES, sopt_size);
2918 m->m_len = min(MLEN, sopt_size);
2920 sopt_size -= m->m_len;
2925 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
2930 if (sopt_size > MLEN) {
2931 MCLGET(m, sopt->sopt_td != NULL ? M_WAIT :
2933 if ((m->m_flags & M_EXT) == 0) {
2938 m->m_len = min(MCLBYTES, sopt_size);
2940 m->m_len = min(MLEN, sopt_size);
2942 sopt_size -= m->m_len;
2953 struct mbuf *m0 = m;
2955 if (sopt->sopt_val == NULL)
2957 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2958 if (sopt->sopt_td != NULL) {
2961 error = copyin(sopt->sopt_val, mtod(m,
char *),
2968 bcopy(sopt->sopt_val, mtod(m,
char *), m->m_len);
2969 sopt->sopt_valsize -= m->m_len;
2970 sopt->sopt_val = (
char *)sopt->sopt_val + m->m_len;
2974 panic(
"ip6_sooptmcopyin");
2982 struct mbuf *m0 = m;
2985 if (sopt->sopt_val == NULL)
2987 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
2988 if (sopt->sopt_td != NULL) {
2991 error = copyout(mtod(m,
char *), sopt->sopt_val,
2998 bcopy(mtod(m,
char *), sopt->sopt_val, m->m_len);
2999 sopt->sopt_valsize -= m->m_len;
3000 sopt->sopt_val = (
char *)sopt->sopt_val + m->m_len;
3001 valsize += m->m_len;
3009 sopt->sopt_valsize = valsize;
3021 if (so->so_sigio != NULL)
3022 pgsigio(&so->so_sigio, SIGURG, 0);
3027 sopoll(
struct socket *so,
int events,
struct ucred *active_cred,
3035 return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
3045 SOCKBUF_LOCK(&so->so_snd);
3046 SOCKBUF_LOCK(&so->so_rcv);
3047 if (events & (POLLIN | POLLRDNORM))
3048 if (soreadabledata(so))
3049 revents |= events & (POLLIN | POLLRDNORM);
3051 if (events & (POLLOUT | POLLWRNORM))
3052 if (sowriteable(so))
3053 revents |= events & (POLLOUT | POLLWRNORM);
3055 if (events & (POLLPRI | POLLRDBAND))
3056 if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
3057 revents |= events & (POLLPRI | POLLRDBAND);
3059 if ((events & POLLINIGNEOF) == 0) {
3060 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
3061 revents |= events & (POLLIN | POLLRDNORM);
3062 if (so->so_snd.sb_state & SBS_CANTSENDMORE)
3068 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
3070 so->so_rcv.sb_flags |= SB_SEL;
3073 if (events & (POLLOUT | POLLWRNORM)) {
3075 so->so_snd.sb_flags |= SB_SEL;
3079 SOCKBUF_UNLOCK(&so->so_rcv);
3080 SOCKBUF_UNLOCK(&so->so_snd);
3087 struct socket *so = kn->kn_fp->f_data;
3090 switch (kn->kn_filter) {
3092 if (so->so_options & SO_ACCEPTCONN)
3108 sb->sb_flags |= SB_KNOTE;
3154 struct ifnet *ifp,
struct thread *td)
3197 struct sockaddr *addr,
struct mbuf *control,
struct thread *td)
3211 sb->st_blksize = so->so_snd.sb_hiwat;
3231 struct mbuf *top,
struct mbuf *control,
int flags,
struct thread *td)
3239 struct uio *uio,
struct mbuf **mp0,
struct mbuf **controlp,
int *flagsp)
3256 struct socket *so = kn->kn_fp->f_data;
3258 SOCKBUF_LOCK(&so->so_rcv);
3261 so->so_rcv.sb_flags &= ~SB_KNOTE;
3262 SOCKBUF_UNLOCK(&so->so_rcv);
3271 so = kn->kn_fp->f_data;
3272 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
3274 kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
3275 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
3276 kn->kn_flags |= EV_EOF;
3277 kn->kn_fflags = so->so_error;
3279 }
else if (so->so_error)
3281 else if (kn->kn_sfflags & NOTE_LOWAT)
3282 return (kn->kn_data >= kn->kn_sdata);
3284 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
3290 struct socket *so = kn->kn_fp->f_data;
3292 SOCKBUF_LOCK(&so->so_snd);
3295 so->so_snd.sb_flags &= ~SB_KNOTE;
3296 SOCKBUF_UNLOCK(&so->so_snd);
3305 so = kn->kn_fp->f_data;
3306 SOCKBUF_LOCK_ASSERT(&so->so_snd);
3307 kn->kn_data = sbspace(&so->so_snd);
3308 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
3309 kn->kn_flags |= EV_EOF;
3310 kn->kn_fflags = so->so_error;
3312 }
else if (so->so_error)
3314 else if (((so->so_state & SS_ISCONNECTED) == 0) &&
3315 (so->so_proto->pr_flags & PR_CONNREQUIRED))
3317 else if (kn->kn_sfflags & NOTE_LOWAT)
3318 return (kn->kn_data >= kn->kn_sdata);
3320 return (kn->kn_data >= so->so_snd.sb_lowat);
3327 struct socket *so = kn->kn_fp->f_data;
3329 kn->kn_data = so->so_qlen;
3330 return (! TAILQ_EMPTY(&so->so_comp));
3339 if (so->so_cred->cr_uid != uid)
3381 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
3382 so->so_state |= SS_ISCONNECTING;
3389 struct socket *head;
3395 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
3396 so->so_state |= SS_ISCONNECTED;
3398 if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
3399 if ((so->so_options & SO_ACCEPTFILTER) == 0) {
3401 TAILQ_REMOVE(&head->so_incomp, so, so_list);
3403 so->so_qstate &= ~SQ_INCOMP;
3404 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
3406 so->so_qstate |= SQ_COMP;
3413 head->so_accf->so_accept_filter->accf_callback,
3414 head->so_accf->so_accept_filter_arg);
3415 so->so_options &= ~SO_ACCEPTFILTER;
3416 ret = head->so_accf->so_accept_filter->accf_callback(so,
3417 head->so_accf->so_accept_filter_arg, M_DONTWAIT);
3418 if (ret == SU_ISCONNECTED)
3421 if (ret == SU_ISCONNECTED)
3441 SOCKBUF_LOCK(&so->so_rcv);
3442 so->so_state &= ~SS_ISCONNECTING;
3443 so->so_state |= SS_ISDISCONNECTING;
3444 so->so_rcv.sb_state |= SBS_CANTRCVMORE;
3445 sorwakeup_locked(so);
3446 SOCKBUF_LOCK(&so->so_snd);
3447 so->so_snd.sb_state |= SBS_CANTSENDMORE;
3448 sowwakeup_locked(so);
3460 SOCKBUF_LOCK(&so->so_rcv);
3461 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
3462 so->so_state |= SS_ISDISCONNECTED;
3463 so->so_rcv.sb_state |= SBS_CANTRCVMORE;
3464 sorwakeup_locked(so);
3465 SOCKBUF_LOCK(&so->so_snd);
3466 so->so_snd.sb_state |= SBS_CANTSENDMORE;
3468 sowwakeup_locked(so);
3478 struct sockaddr *sa2;
3480 sa2 =
malloc(sa->sa_len, M_SONAME, mflags);
3482 bcopy(sa, sa2, sa->sa_len);
3491 int (*func)(
struct socket *,
void *,
int),
void *arg)
3503 panic(
"soupcall_set: bad which");
3505 SOCKBUF_LOCK_ASSERT(sb);
3508 KASSERT(sb->sb_upcall == NULL, (
"soupcall_set: overwriting upcall"));
3510 sb->sb_upcall = func;
3511 sb->sb_upcallarg = arg;
3512 sb->sb_flags |= SB_UPCALL;
3528 panic(
"soupcall_clear: bad which");
3530 SOCKBUF_LOCK_ASSERT(sb);
3531 KASSERT(sb->sb_upcall != NULL, (
"soupcall_clear: no upcall to clear"));
3532 sb->sb_upcall = NULL;
3533 sb->sb_upcallarg = NULL;
3534 sb->sb_flags &= ~SB_UPCALL;
3549 xso->xso_len =
sizeof *xso;
3551 xso->so_type = so->so_type;
3552 xso->so_options = so->so_options;
3553 xso->so_linger = so->so_linger;
3554 xso->so_state = so->so_state;
3555 xso->so_pcb = so->so_pcb;
3556 xso->xso_protocol = so->so_proto->pr_protocol;
3557 xso->xso_family = so->so_proto->pr_domain->dom_family;
3558 xso->so_qlen = so->so_qlen;
3559 xso->so_incqlen = so->so_incqlen;
3560 xso->so_qlimit = so->so_qlimit;
3561 xso->so_timeo = so->so_timeo;
3562 xso->so_error = so->so_error;
3563 xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
3564 xso->so_oobmark = so->so_oobmark;
3567 xso->so_uid = so->so_cred->cr_uid;
3581 TAILQ_FOREACH(so, &so->so_comp, so_list)
3589 return (&so->so_rcv);
3596 return (&so->so_snd);
3603 return (so->so_state);
3617 return (so->so_options);
3624 so->so_options = val;
3631 return (so->so_error);
3645 return (so->so_linger);
3652 so->so_linger = val;
3659 return (so->so_proto);
3687 sorwakeup_locked(so);
3694 sowwakeup_locked(so);
int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td)
static void sodealloc(struct socket *so)
void soisconnected(struct socket *so)
int tvtohz(struct timeval *tv)
void so_unlock(struct socket *so)
int pru_rcvd_notsupp(struct socket *so, int flags)
int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
static int filt_sowrite(struct knote *kn, long hint)
int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max)
void soupcall_clear(struct socket *so, int which)
int soo_kqfilter(struct file *fp, struct knote *kn)
int prison_check_af(struct ucred *cred, int af)
void so_error_set(struct socket *so, int val)
int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
int soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
void soisdisconnecting(struct socket *so)
void selrecord(struct thread *selector, struct selinfo *sip)
void * malloc(unsigned long size, struct malloc_type *mtp, int flags)
SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL)
static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
void sbunlock(struct sockbuf *sb)
struct sockaddr * sodupsockaddr(const struct sockaddr *sa, int mflags)
static struct filterops soread_filtops
int sbwait(struct sockbuf *sb)
static struct filterops solisten_filtops
void so_protosw_set(struct socket *so, struct protosw *val)
static struct filterops sowrite_filtops
void sohasoutofband(struct socket *so)
int soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
void panic(const char *fmt,...)
int pru_sense_null(struct socket *so, struct stat *sb)
int pru_listen_notsupp(struct socket *so, int backlog, struct thread *td)
int pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
static void socket_init(void *tag)
struct mbuf * m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
void knote(struct knlist *list, long hint, int lockflags)
static void init_maxsockets(void *ignored)
int knlist_empty(struct knlist *knl)
void selwakeuppri(struct selinfo *sip, int pri)
int pru_disconnect_notsupp(struct socket *so)
int soaccept(struct socket *so, struct sockaddr **nam)
static struct socket * soalloc(struct vnet *vnet)
int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
void solisten_proto(struct socket *so, int backlog)
void socantrcvmore(struct socket *so)
void m_freem(struct mbuf *mb)
int solisten(struct socket *so, int backlog, struct thread *td)
int sosetopt(struct socket *so, struct sockopt *sopt)
void knlist_destroy(struct knlist *knl)
void sbrelease_internal(struct sockbuf *sb, struct socket *so)
struct mbuf * m_copym(struct mbuf *m, int off0, int len, int wait)
MTX_SYSINIT(accept_mtx,&accept_mtx,"accept", MTX_DEF)
int sodisconnect(struct socket *so)
int sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
void funsetown(struct sigio **sigiop)
void wakeup_one(void *ident)
void sofree(struct socket *so)
void sbdrop_locked(struct sockbuf *sb, int len)
int pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
struct socket * sonewconn(struct socket *head, int connstatus)
int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
int so_setsockopt(struct socket *so, int level, int optname, void *optval, size_t optlen)
int ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
int sblock(struct sockbuf *sb, int flags)
int soopt_getm(struct sockopt *sopt, struct mbuf **mp)
void so_sorwakeup(struct socket *so)
int so_state_get(const struct socket *so)
void so_lock(struct socket *so)
int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
void knlist_remove(struct knlist *knl, struct knote *kn, int islocked)
int sopoll(struct socket *so, int events, struct ucred *active_cred, struct thread *td)
void soupcall_set(struct socket *so, int which, int(*func)(struct socket *, void *, int), void *arg)
int soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
MALLOC_DEFINE(M_SONAME,"soname","socket name")
void sorflush(struct socket *so)
static int sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
void soabort(struct socket *so)
SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0,"IPC")
struct protosw * pffindproto(int family, int protocol, int type)
static int numopensockets
void seldrain(struct selinfo *sip)
int m_mbuftouio(struct uio *uio, struct mbuf *m, int len)
void soisconnecting(struct socket *so)
void crfree(struct ucred *cr)
static int filt_solisten(struct knote *kn, long hint)
SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,&numopensockets, 0,"Number of open sockets")
int pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
int pru_connect2_notsupp(struct socket *so1, struct socket *so2)
int so_linger_get(const struct socket *so)
int soconnect2(struct socket *so1, struct socket *so2)
int sysctl_handle_int(SYSCTL_HANDLER_ARGS)
static void filt_sordetach(struct knote *kn)
void sbdestroy(struct sockbuf *sb, struct socket *so)
void knlist_add(struct knlist *knl, struct knote *kn, int islocked)
void log(int level, const char *fmt,...)
void so_options_set(struct socket *so, int val)
int pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
int pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td)
static void socket_zone_change(void *tag)
static void filt_sowdetach(struct knote *kn)
int do_getopt_accept_filter(struct socket *so, struct sockopt *sopt)
struct ucred * crhold(struct ucred *cr)
void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
static int filt_soread(struct knote *kn, long hint)
int uiomove(void *cp, int n, struct uio *uio)
int soshutdown(struct socket *so, int how)
struct protosw * so_protosw_get(const struct socket *so)
int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
struct protosw * pffindtype(int family, int type)
int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
void so_state_set(struct socket *so, int val)
int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt)
#define VNET_SO_ASSERT(so)
int pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, struct thread *td)
void m_cat(struct mbuf *m, struct mbuf *n)
static struct mtx so_global_mtx
int sogetopt(struct socket *so, struct sockopt *sopt)
int pru_shutdown_notsupp(struct socket *so)
int soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
struct sockbuf * so_sockbuf_snd(struct socket *so)
void so_sorwakeup_locked(struct socket *so)
void so_sowwakeup_locked(struct socket *so)
int sopoll_generic(struct socket *so, int events, struct ucred *active_cred, struct thread *td)
int pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
int soclose(struct socket *so)
int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
int pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
void sbdroprecord_locked(struct sockbuf *sb)
int pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
static __inline void sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
int so_error_get(const struct socket *so)
int socheckuid(struct socket *so, uid_t uid)
void soisdisconnected(struct socket *so)
int socreate(int dom, struct socket **aso, int type, int proto, struct ucred *cred, struct thread *td)
struct sockbuf * so_sockbuf_rcv(struct socket *so)
void so_sowwakeup(struct socket *so)
int so_options_get(const struct socket *so)
int solisten_proto_check(struct socket *so)
int sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
void sotoxsocket(struct socket *so, struct xsocket *xso)
void so_listeners_apply_all(struct socket *so, void(*func)(struct socket *, void *), void *arg)
static struct pollrec pr[POLL_LIST_LEN]
int socow_setup(struct mbuf *m0, struct uio *uio)
void knlist_init_mtx(struct knlist *knl, struct mtx *lock)
void so_linger_set(struct socket *so, int val)
SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT|CTLFLAG_RW, 0, sizeof(int), sysctl_somaxconn,"I","Maximum listen socket pending connection accept queue size")
int soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
void pgsigio(struct sigio **sigiop, int sig, int checkctty)
int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
const struct cf_level * level
int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
void sx_destroy(struct sx *sx)