Commit 18d5be5b authored by Julius Goryavsky's avatar Julius Goryavsky

MDEV-25880: rsync may be mistakenly killed when overlapping SST

This commit fixes a bug was originally discovered during the
galera_nbo_sst_slave mtr test for 10.6 branch. However it is
relevant for all versions and can lead to intermittent SST
crashes via rsync on very fast server restarts - when a new
SST process (for example, after starting a new server instance)
overlaps the old SST process started by the previous, already
terminated server. This overlap can result in the new rsync
being killed instead of the old rsync, or the pid file from
the new rsync being killed, which then lead to problems.
parent 1c35a3f6
......@@ -1190,7 +1190,6 @@ trim_string()
check_pid()
{
local pid_file="$1"
local remove=${2:-0}
if [ -r "$pid_file" ]; then
local pid=$(cat "$pid_file" 2>/dev/null)
if [ -n "$pid" ]; then
......@@ -1201,6 +1200,7 @@ check_pid()
fi
fi
fi
local remove=${2:-0}
if [ $remove -eq 1 ]; then
rm -f "$pid_file"
fi
......
......@@ -68,6 +68,8 @@ cleanup_joiner()
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
wsrep_cleanup_progress_file
fi
[ -f "$SST_PID" ] && rm -f "$SST_PID"
}
check_pid_and_port()
......@@ -281,6 +283,7 @@ then
*)
wsrep_log_error "Unrecognized ssl-mode option: '$SSLMODE'"
exit 22 # EINVAL
;;
esac
if [ -z "$CAFILE_OPT" ]; then
wsrep_log_error "Can't have ssl-mode='$SSLMODE' without CA file"
......@@ -499,6 +502,21 @@ elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]
then
check_sockets_utils
SST_PID="$WSREP_SST_OPT_DATA/wsrep_rsync_sst.pid"
# give some time for lingering stunnel from previous SST to complete
check_round=0
while check_pid "$SST_PID" 0
do
wsrep_log_info "previous SST not completed, waiting for it to exit"
check_round=$(( check_round + 1 ))
if [ $check_round -eq 10 ]; then
wsrep_log_error "SST script already running."
exit 114 # EALREADY
fi
sleep 1
done
# give some time for lingering stunnel from previous SST to complete
check_round=0
while check_pid "$STUNNEL_PID" 1
......@@ -583,12 +601,14 @@ EOF
RSYNC_ADDR="*"
fi
echo $$ > "$SST_PID"
if [ -z "$STUNNEL" ]
then
rsync --daemon --no-detach --port "$RSYNC_PORT" --config "$RSYNC_CONF" $RSYNC_EXTRA_ARGS &
RSYNC_REAL_PID=$!
TRANSFER_REAL_PID="$RSYNC_REAL_PID"
TRANSFER_PID=$RSYNC_PID
TRANSFER_REAL_PID=$RSYNC_REAL_PID
TRANSFER_PID="$RSYNC_PID"
else
# Let's check if the path to the config file contains a space?
if [ "${RSYNC_CONF#* }" = "$RSYNC_CONF" ]; then
......@@ -631,8 +651,8 @@ EOF
fi
stunnel "$STUNNEL_CONF" &
STUNNEL_REAL_PID=$!
TRANSFER_REAL_PID="$STUNNEL_REAL_PID"
TRANSFER_PID=$STUNNEL_PID
TRANSFER_REAL_PID=$STUNNEL_REAL_PID
TRANSFER_PID="$STUNNEL_PID"
fi
if [ "${SSLMODE#VERIFY}" != "$SSLMODE" ]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment