diff options
author | Joerg Jaspert <joerg@debian.org> | 2009-06-22 00:57:08 +0200 |
---|---|---|
committer | Joerg Jaspert <joerg@debian.org> | 2009-06-22 00:57:08 +0200 |
commit | 3f719eb4922055bdeb6755f80128e743661d0bcb (patch) | |
tree | 5463745b990d5f6bd4b973823d3c9cb857a1508f | |
parent | 0c0529ba42e7b23afae1006b01d46e0a8fa09913 (diff) |
websync
initial version of a script to sync the debian website with.
Signed-off-by: Joerg Jaspert <joerg@debian.org>
-rwxr-xr-x | bin/websync | 305 | ||||
-rw-r--r-- | etc/websync.conf | 121 |
2 files changed, 426 insertions, 0 deletions
diff --git a/bin/websync b/bin/websync new file mode 100755 index 0000000..7578a9c --- /dev/null +++ b/bin/websync @@ -0,0 +1,305 @@ +#! /bin/bash +# No, we can not deal with sh alone. + +set -e +set -u +# ERR traps should be inherited from functions too. (And command +# substitutions and subshells and whatnot, but for us the function is +# the important part here) +set -E + +# websync script for Debian +# Based losely on the old websync written by an +# unknown number of different people over the years and ftpsync. +# +# Copyright (C) 2008,2009 Joerg Jaspert <joerg@debian.org> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; version 2. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +# In case the admin somehow wants to have this script located someplace else, +# he can set BASEDIR, and we will take that. If it is unset we take ${HOME} +# How the admin sets this isn't our place to deal with. One could use a wrapper +# for that. Or pam_env. Or whatever fits in the local setup. :) +BASEDIR=${BASEDIR:-"${HOME}"} + +# Script version. DO NOT CHANGE, *unless* you change the master copy maintained +# by Joerg Jaspert and the Debian mirroradm group. +# This is used to track which mirror is using which script version. +VERSION="0815" + +# Source our common functions +. "${BASEDIR}/etc/common" + +######################################################################## +######################################################################## +## functions ## +######################################################################## +######################################################################## +# All the stuff we want to do when we exit, no matter where +cleanup() { + trap - ERR TERM HUP INT QUIT EXIT + # all done. Mail the log, exit. + log "Mirrorsync done"; + if [ -n "${MAILTO}" ]; then + # In case rsync had something on stderr + if [ -s "${LOGDIR}/rsync-${NAME}.error" ]; then + mail -e -s "[${PROGRAM}@$(hostname -s)] ($$) rsync ERROR on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO} < "${LOGDIR}/rsync-${NAME}.error" + fi + if [ "x${ERRORSONLY}x" = "xfalsex" ]; then + # And the normal log + MAILFILES="${LOG}" + if [ "x${FULLLOGS}x" = "xtruex" ]; then + # Someone wants full logs including rsync + MAILFILES="${MAILFILES} ${LOGDIR}/rsync-${NAME}.log" + fi + cat ${MAILFILES} | mail -e -s "[${PROGRAM}@$(hostname -s)] web sync finished on $(date +"%Y.%m.%d-%H:%M:%S")" ${MAILTO} + fi + fi + + savelog "${LOGDIR}/rsync-${NAME}.log" + savelog "${LOGDIR}/rsync-${NAME}.error" + savelog "$LOG" > /dev/null + + rm -f "${LOCK}" +} + + +# Check rsyncs return value +check_rsync() { + + ret=$1 + msg=$2 + + # 24 - vanished source files. Ignored, that should be the target of $UPDATEREQUIRED + # and us re-running. If it's not, uplink is broken anyways. + case "${ret}" in + 0) return 0;; + 24) return 0;; + 23) return 2;; + 30) return 2;; + *) + error "ERROR: ${msg}" + return 1 + ;; + esac +} + +######################################################################## +######################################################################## + +# As what are we called? +NAME="`basename $0`" + +# Now source the config. +. "${BASEDIR}/etc/${NAME}.conf" + +######################################################################## +# Config options go here. Feel free to overwrite them in the config # +# file if you need to. # +# On debian.org machines the defaults should be ok. # +######################################################################## + +######################################################################## +# There should be nothing to edit here, use the config file # +######################################################################## +MIRRORNAME=${MIRRORNAME:-`hostname -f`} +# Where to put logfiles in +LOGDIR=${LOGDIR:-"${BASEDIR}/log"} +# Our own logfile +LOG=${LOG:-"${LOGDIR}/${NAME}.log"} + +# Where should we put all the mirrored files? +TO=${TO:-"/org/www.debian.org/www"} + +# used by log() and error() +PROGRAM=${PROGRAM:-"${NAME}-$(hostname -s)"} + +# Where to send mails about mirroring to? +if [ "x$(hostname -s)x" != "x${MIRRORNAME%%.debian.org}x" ]; then + # We are not on a debian.org host + MAILTO=${MAILTO:-"root"} +else + # Yay, on a .debian.org host + MAILTO=${MAILTO:-"mirrorlogs@debian.org"} +fi +# Want errors only or every log? +ERRORSONLY=${ERRORSONLY:-"true"} +# Want full logs, ie. including the rsync one? +FULLLOGS=${FULLLOGS:-"false"} + +# How many logfiles to keep +LOGROTATE=${LOGROTATE:-14} + +# Our lockfile +LOCK=${LOCK:-"${TO}/Website-Update-in-Progress-${MIRRORNAME}"} +# Do we need another rsync run? +UPDATEREQUIRED="${TO}/Website-Update-Required-${MIRRORNAME}" +# Trace file for mirror stats and checks (make sure we get full hostname) +TRACE=${TRACE:-".project/trace/${MIRRORNAME}"} + +# rsync program +RSYNC=${RSYNC:-rsync} +# Rsync filter rules. Used to protect various files we always want to keep, even if we otherwise delete +# excluded files +RSYNC_FILTER=${RSYNC_FILTER:-"--filter=protect_Website-Update-in-Progress-${MIRRORNAME} --filter=protect_${TRACE} --filter=protect_Website-Update-Required-${MIRRORNAME}"} +# Default rsync options for *every* rsync call +RSYNC_OPTIONS=${RSYNC_OPTIONS:-"-rltvHSB8192 --timeout 3600 --stats ${RSYNC_FILTER}"} +RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"} +# Which rsync share to use on our upstream mirror? +RSYNC_PATH=${RSYNC_PATH:-"web.debian.org"} + +# our username for the rsync share +RSYNC_USER=${RSYNC_USER:-""} +# the password +RSYNC_PASSWORD=${RSYNC_PASSWORD:-""} + +# a possible proxy +RSYNC_PROXY=${RSYNC_PROXY:-""} + +# General excludes. +EXCLUDE=${EXCLUDE:-"${HOSTNAME}"} + +# The temp directory used by rsync --delay-updates is not +# world-readable remotely. Always exclude it to avoid errors. +EXCLUDE="${EXCLUDE} --exclude .~tmp~/" + +# And site specific excludes, by default its the sponsor stuff that should be local to all +SITE_EXCLUDE=${SITE_EXCLUDE:-"--exclude core --exclude sponsor_img.jpg --exclude sponsor.html"} + +# Hooks +HOOK1=${HOOK1:-""} +HOOK2=${HOOK2:-""} +HOOK3=${HOOK3:-""} +HOOK4=${HOOK4:-""} + +# Are we a hub? +HUB=${HUB:-"false"} + +# Some sane defaults +cd "${BASEDIR}" +umask 022 + +# If we are here for the first time, create the +# destination and the trace directory +mkdir -p "${TO}/.project/trace" + +# Used to make sure we will have the archive fully and completly synced before +# we stop, even if we get multiple pushes while this script is running. +# Otherwise we can end up with a half-synced archive: +# - get a push +# - sync, while locked +# - get another push. Of course no extra sync run then happens, we are locked. +# - done. Archive not correctly synced, we don't have all the changes from the second push. +touch "${UPDATEREQUIRED}" + +# Check to see if another sync is in progress +if ! ( set -o noclobber; echo "$$" > "${LOCK}") 2> /dev/null; then + if ! $(kill -0 $(cat ${LOCK}) 2>/dev/null); then + # Process does either not exist or is not owned by us. + echo "$$" > "${LOCK}" + else + echo "Unable to start rsync, lock file still exists, PID $(cat ${LOCK})" + exit 1 + fi +fi + +trap cleanup EXIT ERR TERM HUP INT QUIT + +# Start log by redirecting everything there. +exec >"$LOG" 2>&1 </dev/null + +# Look who pushed us and note that in the log. +log "Mirrorsync start" +PUSHFROM="${SSH_CONNECTION%%\ *}" +if [ -n "${PUSHFROM}" ]; then + log "We got pushed from ${PUSHFROM}" +fi +log "Acquired main lock" + +HOOK=( + HOOKNR=1 + HOOKSCR=${HOOK1} +) +hook $HOOK + +# Now, we might want to sync from anonymous too. +# This is that deep in this script so hook1 could, if wanted, change things! +if [ -z ${RSYNC_USER} ]; then + RSYNCPTH="${RSYNC_HOST}" +else + RSYNCPTH="${RSYNC_USER}@${RSYNC_HOST}" +fi + +# Now do the actual mirroring, and run as long as we have an updaterequired file. +export RSYNC_PASSWORD +export RSYNC_PROXY + +while [ -e "${UPDATEREQUIRED}" ]; do + log "Running mirrorsync, update is required, ${UPDATEREQUIRED} exists" + + rm -f "${UPDATEREQUIRED}" + log "Syncing: ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} ${RSYNCPTH}::${RSYNC_PATH} ${TO}" + + set +e + ${RSYNC} ${RSYNC_OPTIONS} ${RSYNC_OPTIONS2} ${EXCLUDE} ${SOURCE_EXCLUDE} \ + ${RSYNCPTH}::${RSYNC_PATH} "${TO}" >"${LOGDIR}/rsync-${NAME}.log" 2>"${LOGDIR}/rsync-${NAME}.error" + result=$? + set -e + + log "Back from rsync with returncode ${result}" + + set +e + check_rsync $result "Sync went wrong, got errorcode ${result}. Logfile: ${LOG}" + GO=$? + set -e + + if [ ${GO} -eq 2 ] && [ -e "${UPDATEREQUIRED}" ]; then + log "We got error ${result} from rsync, but a second push went in hence ignoring this error for now" + elif [ ${GO} -ne 0 ]; then + exit 3 + fi + + HOOK=( + HOOKNR=2 + HOOKSCR=${HOOK2} + ) + hook $HOOK + +done + +if [ -d "`dirname "${TO}/${TRACE}"`" ]; then + LC_ALL=POSIX LANG=POSIX date -u > "${TO}/${TRACE}" + echo "Used websync version: ${VERSION}" >> "${TO}/${TRACE}" + echo "Running on host: $(hostname -f)" >> "${TO}/${TRACE}" +fi + +HOOK=( + HOOKNR=3 + HOOKSCR=${HOOK3} +) +hook $HOOK + +if [ x${HUB} = "xtrue" ]; then + log "Trigger slave mirrors" + ${BASEDIR}/bin/runmirrors "websync" + log "Trigger slave done" + + HOOK=( + HOOKNR=4 + HOOKSCR=${HOOK4} + ) + hook $HOOK +fi + +# All done, rest is done by cleanup hook. diff --git a/etc/websync.conf b/etc/websync.conf new file mode 100644 index 0000000..498ac08 --- /dev/null +++ b/etc/websync.conf @@ -0,0 +1,121 @@ +######################################################################## +######################################################################## +## This is a sample configuration file for the ftpsync mirror script. ## +## Most of the values are commented out and just shown here for ## +## completeness, together with their default value. ## +######################################################################## +######################################################################## + +## Mirrorname. This is used for things like the trace file and should always +## be the full hostname of the mirror. +#MIRRORNAME=${MIRRORNAME:-`hostname -f`} + +## Destination of the mirrored files. Should be an empty directory. +## CAREFUL, this directory will contain the mirror. Everything else +## that might have happened to be in there WILL BE GONE after the mirror sync! +#TO=${TO:-"/org/www.debian.org/www"} + +## The upstream name of the rsync share. +#RSYNC_PATH="web.debian.org" + +## The host we mirror from +#RSYNC_HOST=some.mirror.debian.org + +## In case we need a user to access the rsync share at our upstream host +#RSYNC_USER= + +## If we need a user we also need a password +#RSYNC_PASSWORD= + +## In which directory should logfiles end up +## Note that BASEDIR defaults to $HOME, but can be set before calling the +## ftpsync script to any value you want (for example using pam_env) +#LOGDIR="${BASEDIR}/log" + +## Name of our own logfile. +## Note that ${NAME} is set by the websync script +#LOG="${LOGDIR}/${NAME}.log" + +## The script can send logs (or error messages) to a mail address. +## If this is unset it will default to the local root user unless it is run +## on a .debian.org machine where it will default to the mirroradm people. +#MAILTO="root" + +## If you do want a mail about every single sync, set this to false +## Everything else will only send mails if a mirror sync fails +#ERRORSONLY="true" + +## If you want the logs to also include output of rsync, set this to true. +## Careful, the logs can get pretty big, especially if it is the first mirror +## run +#FULLLOGS="false" + +## If you do want to exclude files from the mirror run, put --exclude statements here. +## See rsync(1) for the exact syntax, these are passed to rsync as written here. +## Please do not use this except for rare cases and after you talked to us. +## For the sponsor logos see SITE_EXCLUDE +#EXCLUDE=${EXCLUDE:-"${HOSTNAME}"} + +## And site specific excludes, by default its the sponsor stuff that should be local to all +#SITE_EXCLUDE=${SITE_EXCLUDE:-"--exclude core --exclude sponsor_img.jpg --exclude sponsor.html"} + +## Do we have leaf mirror to signal we are done and they should sync? +## If so set it to true and make sure you configure runmirrors-websync.mirrors +## and runmirrors-websync.conf for your need. +#HUB=false + +## We do create three logfiles for every run. To save space we rotate them, this +## defines how many we keep +#LOGROTATE=14 + +## Our own lockfile (only one sync should run at any time) +#LOCK="${TO}/Website-Update-in-Progress-${MIRRORNAME}" + +## The following file is used to make sure we will end up with a correctly +## synced mirror even if we get multiple pushes in a short timeframe +#UPDATEREQUIRED="${TO}/Website-Update-Required-${MIRRORNAME}" + +## The trace file is used by a mirror check tool to see when we last +## had a successful mirror sync. Make sure that it always ends up in +## .project/trace and always shows the full hostname. +## This is *relative* to ${TO} +#TRACE=".project/trace/${MIRRORNAME}" + +## We sync our mirror using rsync (everything else would be insane), so +## we need a few options set. +## The rsync program +#RSYNC=rsync + +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! +## BE VERY CAREFUL WHEN YOU CHANGE THE RSYNC_OPTIONS! BETTER DON'T! + +## Default rsync options every rsync invocation sees. +#RSYNC_OPTIONS="-rltvHSB8192 --timeout 3600 --stats --exclude Archive-Update-in-Progress-${MIRRORNAME} --exclude ${TRACE} --exclude Archive-Update-Required-${MIRRORNAME}" + +## Default rsync options +#RSYNC_OPTIONS2=${RSYNC_OPTIONS2:-"--max-delete=40000 --delay-updates --delete --delete-after --delete-excluded"} + +## You may establish the connection via a web proxy by setting the environment +## variable RSYNC_PROXY to a hostname:port pair pointing to your web proxy. Note +## that your web proxy's configuration must support proxy connections to port 873. +# RSYNC_PROXY= + +## Hook scripts can be run at various places during the sync. +## Leave them blank if you don't want any +## Hook1: After lock is acquired, before first rsync +## Hook2: After first rsync, if successful +## Hook3: After second rsync, if successful +## Hook4: Right before leaf mirror triggering +## Hook5: After leaf mirror trigger, only if we have slave mirrors (HUB=true) +## +## Note that Hook3 and Hook4 are likely to be called directly after each other. +## Difference is: Hook3 is called *every* time the second rsync was successful, +## but even if the mirroring needs to re-run thanks to a second push. +## Hook4 is only effective if we are done with mirroring. +#HOOK1= +#HOOK2= +#HOOK3= +#HOOK4= +#HOOK5= |