summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoerg Jaspert <joerg@debian.org>2009-06-22 00:57:08 +0200
committerJoerg Jaspert <joerg@debian.org>2009-06-22 00:57:08 +0200
commit3f719eb4922055bdeb6755f80128e743661d0bcb (patch)
tree5463745b990d5f6bd4b973823d3c9cb857a1508f
parent0c0529ba42e7b23afae1006b01d46e0a8fa09913 (diff)
websync
initial version of a script to sync the debian website with. Signed-off-by: Joerg Jaspert <joerg@debian.org>
-rwxr-xr-xbin/websync305
-rw-r--r--etc/websync.conf121
2 files changed, 426 insertions, 0 deletions
diff --git a/bin/websync b/bin/websync
new file mode 100755
index 0000000..7578a9c
--- /dev/null
+++ b/bin/websync
@@ -0,0 +1,305 @@
+#! /bin/bash
+# No, we can not deal with sh alone.
+
+set -e
+set -u
+# ERR traps should be inherited from functions too. (And command
+# substitutions and subshells and whatnot, but for us the function is
+# the important part here)
+set -E
+
+# websync script for Debian
+# Based losely on the old websync written by an
+# unknown number of different people over the years and ftpsync.
+#
+# Copyright (C) 2008,2009 Joerg Jaspert <joerg@debian.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+# In case the admin somehow wants to have this script located someplace else,
+# he can set BASEDIR, and we will take that. If it is unset we take ${HOME}
+# How the admin sets this isn't our place to deal with. One could use a wrapper
+# for that. Or pam_env. Or whatever fits in the local setup. :)
+BASEDIR=${BASEDIR:-"${HOME}"}
+
+# Script version. DO NOT CHANGE, *unless* you change the master copy maintained
+# by Joerg Jaspert and the Debian mirroradm group.
+# This is used to track which mirror is using which script version.
+VERSION="0815"
+
+# Source our common functions
+. "${BASEDIR}/etc/common"
+
+########################################################################
+########################################################################
+## functions ##
+########################################################################
+########################################################################
+# All the stuff we want to do when we exit, no matter where
+cleanup() {
+ trap - ERR TERM HUP INT QUIT EXIT
+ # all done. Mail the log, exit.
+ log "Mirrorsync done";
+ if [ -n "${MAILTO}" ]; then
+ # In case rsync had something on stderr
+ if [ -s "${LOGDIR}/rsync-${NAME}.error" ]; then
+ mail -e -s "[${PROGRAM}@$(hostname -s)] ($$) rsync ERROR on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO} < "${LOGDIR}/rsync-${NAME}.error"
+ fi
+ if [ "x${ERRORSONLY}x" = "xfalsex" ]; then
+ # And the normal log
+ MAILFILES="${LOG}"
+ if [ "x${FULLLOGS}x" = "xtruex" ]; then
+ # Someone wants full logs including rsync
+ MAILFILES="${MAILFILES} ${LOGDIR}/rsync-${NAME}.log"
+ fi
+ cat ${MAILFILES} | mail -e -s "[${PROGRAM}@$(hostname -s)] web sync finished on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO}
+ fi
+ fi
+
+ savelog "${LOGDIR}/rsync-${NAME}.log"
+ savelog "${LOGDIR}/rsync-${NAME}.error"
+ savelog "$LOG" > /dev/null
+
+ rm -f "${LOCK}"
+}
+
+
+# Check rsyncs return value
+check_rsync() {
+
+ ret=$1
+ msg=$2
+
+ # 24 - vanished source files. Ignored, that should be the target of $UPDATEREQUIRED
+ # and us re-running. If it's not, uplink is broken anyways.
+ case "${ret}" in
+ 0) return 0;;
+ 24) return 0;;
+ 23) return 2;;
+ 30) return 2;;
+ *)
+ error "ERROR: ${msg}"
+ return 1
+ ;;
+ esac
+}
+
+########################################################################
+########################################################################
+
+# As what are we called?
+NAME="`basename $0`"
+
+# Now source the config.
+. "${BASEDIR}/etc/${NAME}.conf"
+
+########################################################################
+# Config options go here. Feel free to overwrite them in the config #
+# file if you need to. #
+# On debian.org machines the defaults should be ok. #
+########################################################################
+
+########################################################################
+# There should be nothing to edit here, use the config file #
+########################################################################
+MIRRORNAME=${MIRRORNAME:-`hostname -f`}
+# Where to put logfiles in
+LOGDIR=${LOGDIR:-"${BASEDIR}/log"}
+# Our own logfile
+LOG=${LOG:-"${LOGDIR}/${NAME}.log"}
+
+# Where should we put all the mirrored files?
+TO=${TO:-"/org/www.debian.org/www"}
+
+# used by log() and error()
+PROGRAM=${PROGRAM:-"${NAME}-$(hostname -s)"}
+
+# Where to send mails about mirroring to?
+if [ "x$(hostname -s)x" != "x${MIRRORNAME%%.debian.org}x" ]; then
+ # We are not on a debian.org host
+ MAILTO=${MAILTO:-"root"}
+else
+ # Yay, on a .debian.org host
+ MAILTO=${MAILTO:-"mirrorlogs@debian.org"}
+fi
+# Want errors only or every log?
+ERRORSONLY=${ERRORSONLY:-"true"}
+# Want full logs, ie. including the rsync one?
+FULLLOGS=${FULLLOGS:-"false"}
+
+# How many logfiles to keep
+LOGROTATE=${LOGROTATE:-14}
+
+# Our lockfile
+LOCK=${LOCK:-"${TO}/Website-Update-in-Progress-${MIRRORNAME}"}
+# Do we need another rsync run?
+UPDATEREQUIRED="${TO}/Website-Update-Required-${MIRRORNAME}"
+# Trace file for mirror stats and checks (make sure we get full hostname)
+TRACE=${TRACE:-".project/trace/${MIRRORNAME}"}
+
+# rsync program
+RSYNC=${RSYNC:-rsync}
+# Rsync filter rules. Used to protect various files we always want to keep, even if we otherwise delete
+# excluded files
+RSYNC_FILTER=${RSYNC_FILTER:-"--filter=protect_Website-Update-in-Progress-${MIRRORNAME} --filter=protect_${TRACE} --filter=protect_Website-Update-Required-${MIRRORNAME}"}
+# Default rsync options for *every* rsync call
+RSYNC_OPTIONS=${RSYNC_OPTIONS:-"-rltvHSB8192 --timeout 3600 --stats ${RSYNC_FILTER}"}
+RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"}
+# Which rsync share to use on our upstream mirror?
+RSYNC_PATH=${RSYNC_PATH:-"web.debian.org"}
+
+# our username for the rsync share
+RSYNC_USER=${RSYNC_USER:-""}
+# the password
+RSYNC_PASSWORD=${RSYNC_PASSWORD:-""}
+
+# a possible proxy
+RSYNC_PROXY=${RSYNC_PROXY:-""}
+
+# General excludes.
+EXCLUDE=${EXCLUDE:-"${HOSTNAME}"}
+
+# The temp directory used by rsync --delay-updates is not
+# world-readable remotely. Always exclude it to avoid errors.
+EXCLUDE="${EXCLUDE} --exclude .~tmp~/"
+
+# And site specific excludes, by default its the sponsor stuff that should be local to all
+SITE_EXCLUDE=${SITE_EXCLUDE:-"--exclude core --exclude sponsor_img.jpg --exclude sponsor.html"}
+
+# Hooks
+HOOK1=${HOOK1:-""}
+HOOK2=${HOOK2:-""}
+HOOK3=${HOOK3:-""}
+HOOK4=${HOOK4:-""}
+
+# Are we a hub?
+HUB=${HUB:-"false"}
+
+# Some sane defaults
+cd "${BASEDIR}"
+umask 022
+
+# If we are here for the first time, create the
+# destination and the trace directory
+mkdir -p "${TO}/.project/trace"
+
+# Used to make sure we will have the archive fully and completly synced before
+# we stop, even if we get multiple pushes while this script is running.
+# Otherwise we can end up with a half-synced archive:
+# - get a push
+# - sync, while locked
+# - get another push. Of course no extra sync run then happens, we are locked.
+# - done. Archive not correctly synced, we don't have all the changes from the second push.
+touch "${UPDATEREQUIRED}"
+
+# Check to see if another sync is in progress
+if ! ( set -o noclobber; echo "$$" > "${LOCK}") 2> /dev/null; then
+ if ! $(kill -0 $(cat ${LOCK}) 2>/dev/null); then
+ # Process does either not exist or is not owned by us.
+ echo "$$" > "${LOCK}"
+ else
+ echo "Unable to start rsync, lock file still exists, PID $(cat ${LOCK})"
+ exit 1
+ fi
+fi
+
+trap cleanup EXIT ERR TERM HUP INT QUIT
+
+# Start log by redirecting everything there.
+exec >"$LOG" 2>&1 </dev/null
+
+# Look who pushed us and note that in the log.
+log "Mirrorsync start"
+PUSHFROM="${SSH_CONNECTION%%\ *}"
+if [ -n "${PUSHFROM}" ]; then
+ log "We got pushed from ${PUSHFROM}"
+fi
+log "Acquired main lock"
+
+HOOK=(
+ HOOKNR=1
+ HOOKSCR=${HOOK1}
+)
+hook $HOOK
+
+# Now, we might want to sync from anonymous too.
+# This is that deep in this script so hook1 could, if wanted, change things!
+if [ -z ${RSYNC_USER} ]; then
+ RSYNCPTH="${RSYNC_HOST}"
+else
+ RSYNCPTH="${RSYNC_USER}@${RSYNC_HOST}"
+fi
+
+# Now do the actual mirroring, and run as long as we have an updaterequired file.
+export RSYNC_PASSWORD
+export RSYNC_PROXY
+
+while [ -e "${UPDATEREQUIRED}" ]; do
+ log "Running mirrorsync, update is required, ${UPDATEREQUIRED} exists"
+
+ rm -f "${UPDATEREQUIRED}"
+ log "Syncing: ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} ${RSYNCPTH}::${RSYNC_PATH} ${TO}"
+
+ set +e
+ ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} \
+ ${RSYNCPTH}::${RSYNC_PATH} "${TO}" >"${LOGDIR}/rsync-${NAME}.log" 2>"${LOGDIR}/rsync-${NAME}.error"
+ result=$?
+ set -e
+
+ log "Back from rsync with returncode ${result}"
+
+ set +e
+ check_rsync $result "Sync went wrong, got errorcode ${result}. Logfile: ${LOG}"
+ GO=$?
+ set -e
+
+ if [ ${GO} -eq 2 ] && [ -e "${UPDATEREQUIRED}" ]; then
+ log "We got error ${result} from rsync, but a second push went in hence ignoring this error for now"
+ elif [ ${GO} -ne 0 ]; then
+ exit 3
+ fi
+
+ HOOK=(
+ HOOKNR=2
+ HOOKSCR=${HOOK2}
+ )
+ hook $HOOK
+
+done
+
+if [ -d "`dirname "${TO}/${TRACE}"`" ]; then
+ LC_ALL=POSIX LANG=POSIX date -u > "${TO}/${TRACE}"
+ echo "Used websync version: ${VERSION}" >> "${TO}/${TRACE}"
+ echo "Running on host: $(hostname -f)" >> "${TO}/${TRACE}"
+fi
+
+HOOK=(
+ HOOKNR=3
+ HOOKSCR=${HOOK3}
+)
+hook $HOOK
+
+if [ x${HUB} = "xtrue" ]; then
+ log "Trigger slave mirrors"
+ ${BASEDIR}/bin/runmirrors "websync"
+ log "Trigger slave done"
+
+ HOOK=(
+ HOOKNR=4
+ HOOKSCR=${HOOK4}
+ )
+ hook $HOOK
+fi
+
+# All done, rest is done by cleanup hook.
diff --git a/etc/websync.conf b/etc/websync.conf
new file mode 100644
index 0000000..498ac08
--- /dev/null
+++ b/etc/websync.conf
@@ -0,0 +1,121 @@
+########################################################################
+########################################################################
+## This is a sample configuration file for the ftpsync mirror script. ##
+## Most of the values are commented out and just shown here for ##
+## completeness, together with their default value. ##
+########################################################################
+########################################################################
+
+## Mirrorname. This is used for things like the trace file and should always
+## be the full hostname of the mirror.
+#MIRRORNAME=${MIRRORNAME:-`hostname -f`}
+
+## Destination of the mirrored files. Should be an empty directory.
+## CAREFUL, this directory will contain the mirror. Everything else
+## that might have happened to be in there WILL BE GONE after the mirror sync!
+#TO=${TO:-"/org/www.debian.org/www"}
+
+## The upstream name of the rsync share.
+#RSYNC_PATH="web.debian.org"
+
+## The host we mirror from
+#RSYNC_HOST=some.mirror.debian.org
+
+## In case we need a user to access the rsync share at our upstream host
+#RSYNC_USER=
+
+## If we need a user we also need a password
+#RSYNC_PASSWORD=
+
+## In which directory should logfiles end up
+## Note that BASEDIR defaults to $HOME, but can be set before calling the
+## ftpsync script to any value you want (for example using pam_env)
+#LOGDIR="${BASEDIR}/log"
+
+## Name of our own logfile.
+## Note that ${NAME} is set by the websync script
+#LOG="${LOGDIR}/${NAME}.log"
+
+## The script can send logs (or error messages) to a mail address.
+## If this is unset it will default to the local root user unless it is run
+## on a .debian.org machine where it will default to the mirroradm people.
+#MAILTO="root"
+
+## If you do want a mail about every single sync, set this to false
+## Everything else will only send mails if a mirror sync fails
+#ERRORSONLY="true"
+
+## If you want the logs to also include output of rsync, set this to true.
+## Careful, the logs can get pretty big, especially if it is the first mirror
+## run
+#FULLLOGS="false"
+
+## If you do want to exclude files from the mirror run, put --exclude statements here.
+## See rsync(1) for the exact syntax, these are passed to rsync as written here.
+## Please do not use this except for rare cases and after you talked to us.
+## For the sponsor logos see SITE_EXCLUDE
+#EXCLUDE=${EXCLUDE:-"${HOSTNAME}"}
+
+## And site specific excludes, by default its the sponsor stuff that should be local to all
+#SITE_EXCLUDE=${SITE_EXCLUDE:-"--exclude core --exclude sponsor_img.jpg --exclude sponsor.html"}
+
+## Do we have leaf mirror to signal we are done and they should sync?
+## If so set it to true and make sure you configure runmirrors-websync.mirrors
+## and runmirrors-websync.conf for your need.
+#HUB=false
+
+## We do create three logfiles for every run. To save space we rotate them, this
+## defines how many we keep
+#LOGROTATE=14
+
+## Our own lockfile (only one sync should run at any time)
+#LOCK="${TO}/Website-Update-in-Progress-${MIRRORNAME}"
+
+## The following file is used to make sure we will end up with a correctly
+## synced mirror even if we get multiple pushes in a short timeframe
+#UPDATEREQUIRED="${TO}/Website-Update-Required-${MIRRORNAME}"
+
+## The trace file is used by a mirror check tool to see when we last
+## had a successful mirror sync. Make sure that it always ends up in
+## .project/trace and always shows the full hostname.
+## This is *relative* to ${TO}
+#TRACE=".project/trace/${MIRRORNAME}"
+
+## We sync our mirror using rsync (everything else would be insane), so
+## we need a few options set.
+## The rsync program
+#RSYNC=rsync
+
+## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T!
+## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T!
+## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T!
+## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T!
+
+## Default rsync options every rsync invocation sees.
+#RSYNC_OPTIONS="-rltvHSB8192 --timeout 3600 --stats --exclude Archive-Update-in-Progress-${MIRRORNAME} --exclude ${TRACE} --exclude Archive-Update-Required-${MIRRORNAME}"
+
+## Default rsync options
+#RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"}
+
+## You may establish the connection via a web proxy by setting the environment
+## variable RSYNC_PROXY to a hostname:port pair pointing to your web proxy. Note
+## that your web proxy's configuration must support proxy connections to port 873.
+# RSYNC_PROXY=
+
+## Hook scripts can be run at various places during the sync.
+## Leave them blank if you don't want any
+## Hook1: After lock is acquired, before first rsync
+## Hook2: After first rsync, if successful
+## Hook3: After second rsync, if successful
+## Hook4: Right before leaf mirror triggering
+## Hook5: After leaf mirror trigger, only if we have slave mirrors (HUB=true)
+##
+## Note that Hook3 and Hook4 are likely to be called directly after each other.
+## Difference is: Hook3 is called *every* time the second rsync was successful,
+## but even if the mirroring needs to re-run thanks to a second push.
+## Hook4 is only effective if we are done with mirroring.
+#HOOK1=
+#HOOK2=
+#HOOK3=
+#HOOK4=
+#HOOK5=