Fixed hang on race condition with UDP-RECV and fork

This commit is contained in:
Gerhard Rieger 2020-11-28 10:21:39 +01:00
parent 55518fa690
commit 5570bf4d62
7 changed files with 107 additions and 8 deletions

View file

@ -27,6 +27,13 @@ Corrections:
not writing complete pages.
Test: O_DIRECT
There was a race condition in the way Socat UDP-RECVFROM and similar
addresses with option fork prevents one packet from triggering
multiple processes. The symptom was that Socat master process seemed to
hang and did not process further packets. The fix makes use of
pselect() system call.
Thanks to Fulvio Scapin for reporting this issue.
Porting:
In gcc version 10 the default changed from -fcommon to -fno-common.
Consequently, linking filan and procan failed with error

View file

@ -626,6 +626,33 @@ typedef int sig_atomic_t;
# endif
#endif
/* default: long */
#if !defined(HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC) || !HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC
# undef HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC
# define HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC 5
#endif
#ifndef F_tv_nsec
# if HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC==1
#define F_tv_nsec "%09hd"
# elif HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC==2
#define F_tv_nsec "%09hu"
# elif HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC==3
#define F_tv_nsec "%09d"
# elif HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC==4
#define F_tv_nsec "%09u"
# elif HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC==5
#define F_tv_nsec "%09ld"
# elif HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC==6
#define F_tv_nsec "%09lu"
# elif HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC==7
#define F_tv_nsec "%09Ld"
# elif HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC==8
#define F_tv_nsec "%09Lu"
# else
#error "HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC is out of range:" HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC
# endif
#endif
/* default: long */
#if !defined(HAVE_TYPEOF_RLIM_MAX) || !HAVE_TYPEOF_RLIM_MAX
# undef HAVE_TYPEOF_RLIM_MAX
@ -653,6 +680,10 @@ typedef int sig_atomic_t;
# endif
#endif
/* sigset_t printing - not an exact solution yet */
#define F_sigset "0x%4lx"
typedef unsigned long T_sigset;
/* default: socklen_t */
#if !defined(HAVE_TYPEOF_STRUCT_CMSGHDR_CMSG_LEN) || !HAVE_TYPEOF_STRUCT_CMSGHDR_CMSG_LEN
# undef HAVE_TYPEOF_STRUCT_CMSGHDR_CMSG_LEN

View file

@ -63,6 +63,9 @@
/* Define if you have the select function. */
#undef HAVE_SELECT
/* Define if you have the pselect function. */
#undef HAVE_PSELECT
/* Define if you have the poll function. */
#undef HAVE_POLL
@ -599,6 +602,7 @@
#undef HAVE_TYPEOF_ST64_BLOCKS
#undef HAVE_TYPEOF_STRUCT_TIMEVAL_TV_USEC
#undef HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC
#undef HAVE_TYPEOF_RLIM_MAX

View file

@ -769,7 +769,7 @@ AC_PROG_GCC_TRADITIONAL
AC_FUNC_MEMCMP
AC_TYPE_SIGNAL
AC_FUNC_STRFTIME
AC_CHECK_FUNCS(putenv select poll socket strtod strtol)
AC_CHECK_FUNCS(putenv select pselect poll socket strtod strtol)
AC_CHECK_FUNCS(strtoul uname getpgid getsid gethostbyname getaddrinfo)
AC_CHECK_FUNCS(setgroups inet_aton)
AC_CHECK_FUNCS()
@ -1851,6 +1851,8 @@ fi
AC_TYPEOF_COMPONENT([#include <sys/time.h>], struct timeval, tv_usec, HAVE_TYPEOF_STRUCT_TIMEVAL_TV_USEC, sc_cv_type_struct_timeval_tv_usec)
AC_TYPEOF_COMPONENT([#include <sys/time.h>], struct timespec, tv_nsec, HAVE_TYPEOF_STRUCT_TIMESPEC_TV_NSEC, sc_cv_type_struct_timespec_tv_nsec)
AC_TYPEOF_COMPONENT([#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>],

44
sycls.c
View file

@ -858,6 +858,50 @@ int Select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
return result;
}
#if HAVE_PSELECT
/* we only show the first word of the fd_set's; hope this is enough for most
cases. */
int Pselect(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
const struct timespec *timeout, const sigset_t *sigmask) {
int result, _errno;
if (!diag_in_handler) diag_flush();
#if WITH_SYCLS
#if HAVE_FDS_BITS
Debug8("pselect(%d, &0x%lx, &0x%lx, &0x%lx, %s%lu."F_tv_nsec", "F_sigset")",
n, readfds?readfds->fds_bits[0]:0, writefds?writefds->fds_bits[0]:0,
exceptfds?exceptfds->fds_bits[0]:0,
timeout?"&":"NULL/", timeout?timeout->tv_sec:0,
timeout?timeout->tv_nsec:0, *(T_sigset *)sigmask);
#else
Debug8("pselect(%d, &0x%lx, &0x%lx, &0x%lx, %s%lu.%06u)",
n, readfds?readfds->__fds_bits[0]:0, writefds?writefds->__fds_bits[0]:0,
exceptfds?exceptfds->__fds_bits[0]:0,
timeout?"&":"NULL/", timeout?timeout->tv_sec:0,
timeout?timeout->tv_nsec:0);
#endif
#endif /* WITH_SYCLS */
result = pselect(n, readfds, writefds, exceptfds, timeout, sigmask);
_errno = errno;
if (!diag_in_handler) diag_flush();
#if WITH_SYCLS
#if HAVE_FDS_BITS
Debug5("pselect -> (, 0x%lx, 0x%lx, 0x%lx), "F_sigset", %d",
readfds?readfds->fds_bits[0]:0, writefds?writefds->fds_bits[0]:0,
exceptfds?exceptfds->fds_bits[0]:0, *(T_sigset *)sigmask,
result);
#else
Debug6("pselect -> (, 0x%lx, 0x%lx, 0x%lx), %d",
readfds?readfds->__fds_bits[0]:0, writefds?writefds->__fds_bits[0]:0,
exceptfds?exceptfds->__fds_bits[0]:0,
result);
#endif
#endif /* WITH_SYCLS */
errno = _errno;
return result;
}
#endif /* HAVE_PSELECT */
#if WITH_SYCLS
pid_t Fork(void) {

View file

@ -85,6 +85,8 @@ int Chmod(const char *path, mode_t mode);
int Poll(struct pollfd *ufds, unsigned int nfds, int timeout);
int Select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
struct timeval *timeout);
int Pselect(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
const struct timespec *timeout, const sigset_t *sigmask);
#if WITH_SYCLS
pid_t Fork(void);
#endif /* WITH_SYCLS */

View file

@ -1389,27 +1389,27 @@ int _xioopen_dgram_recvfrom(struct single *xfd, int xioflags,
xfd->salen = palen;
if (dofork) {
sigset_t mask_sigchldusr1;
sigset_t oldset, mask_sigchldusr1;
/* we must prevent that the current packet triggers another fork;
therefore we wait for a signal from the recent child: USR1
indicates that is has consumed the last packet; CHLD means it has
terminated */
/* block SIGCHLD and SIGUSR1 until parent is ready to react */
sigemptyset(&mask_sigchldusr1);
Sigprocmask(SIG_BLOCK, NULL, &mask_sigchldusr1);
sigaddset(&mask_sigchldusr1, SIGCHLD);
sigaddset(&mask_sigchldusr1, SIGUSR1);
Sigprocmask(SIG_BLOCK, &mask_sigchldusr1, NULL);
Sigprocmask(SIG_SETMASK, &mask_sigchldusr1, &oldset);
if ((pid = xio_fork(false, level)) < 0) {
Close(xfd->fd);
Sigprocmask(SIG_UNBLOCK, &mask_sigchldusr1, NULL);
Sigprocmask(SIG_SETMASK, &oldset, NULL);
return STAT_RETRYLATER;
}
if (pid == 0) { /* child */
/* no reason to block SIGCHLD in child process */
Sigprocmask(SIG_UNBLOCK, &mask_sigchldusr1, NULL);
Sigprocmask(SIG_SETMASK, &oldset, NULL);
xfd->ppid = Getppid(); /* send parent a signal when packet has
been consumed */
@ -1431,12 +1431,21 @@ int _xioopen_dgram_recvfrom(struct single *xfd, int xioflags,
/* server: continue loop with listen */
xio_waitingfor = pid;
#if HAVE_PSELECT
{
struct timespec timeout = { LONG_MAX, 0 };
Pselect(0, NULL, NULL, NULL, &timeout, &oldset);
Sigprocmask(SIG_SETMASK, &oldset, NULL);
}
#else /* ! HAVE_PSELECT */
/* now we are ready to handle signals */
Sigprocmask(SIG_UNBLOCK, &mask_sigchldusr1, NULL);
Sigprocmask(SIG_SETMASK, &oldset, NULL);
while (!xio_hashappened) {
Sleep(UINT_MAX); /* any signal lets us continue */
Sleep(1); /* any signal speeds up return */
}
#endif /* ! HAVE_PSELECT */
xio_waitingfor = 0; /* so this child will not set hashappened again */
xio_hashappened = false;