#!/bin/bash

# Load configuration file
if [ ! -f kp.cfg ]; then
    . /etc/kp.cfg
else
    . kp.cfg
fi

URL=http://langley.suse.de/pub/pontifex3-opensuse.suse.de/download.opensuse.org/
NPROCS=6

function process_day {
    year=$1; month=$2; day=$3

    log_xz=$URL/$year/$month/download.opensuse.org-$year$month$day-access_log.xz
    log_gz=$URL/$year/$month/download.opensuse.org-$year$month$day-access_log.gz
    mkdir -p $PAYLOAD/$year/$month
    touch $PAYLOAD/$year/$month/$year$month$day.txt
    if curl -sIf -o /dev/null "$log_xz"; then
	curl -s "$log_xz" | xz -d | get_path | sort -T $TMP | uniq | cut -d ' ' -f 2- >$PAYLOAD/$year/$month/$year$month$day.txt
    elif curl -sIf -o /dev/null "$log_gz"; then
	curl -s "$log_gz" | gunzip | get_path | sort -T $TMP | uniq | cut -d ' ' -f 2- >$PAYLOAD/$year/$month/$year$month$day.txt
    fi
}


# If we don't say anything, we get the last 90 days
NDAYS=${1:-90}
LOGS=()

# Store the # on concurrent procs
count=0

for days_ago in `seq -w $NDAYS -1 0`; do
    # Get date in format YYYY-MM-DD and extract the components
    pdate=`date -d "$days_ago day ago" +%Y-%m-%d`
    year=`echo $pdate | cut -d'-' -f1`
    month=`echo $pdate | cut -d'-' -f2`
    day=`echo $pdate | cut -d'-' -f3`

    logfile=$PAYLOAD/$year/$month/$year$month$day.txt

    # If the size of the file is not zero, include the file and don't
    # call process_day()
    if [ -s $logfile ]; then
	LOGS+=($logfile)
	continue
    fi

    count=$((count + 1))
    ( process_day $year $month $day ) &

    LOGS+=($logfile)

    if [ $((count % $NPROCS)) -eq 0 ]; then
	wait
    fi
done

wait

sort --parallel=$NPROCS -T $TMP ${LOGS[*]} | uniq -c | sort --parallel=$NPROCS -T $TMP -nr > $PAYLOAD/_tmp_payload.txt
# Make a backup for the old payload.txt
mv $PAYLOAD/payload.txt $PAYLOAD/payload-`date +%Y-%m-%d`.txt
gzip $PAYLOAD/payload-`date +%Y-%m-%d`.txt
mv $PAYLOAD/_tmp_payload.txt $PAYLOAD/payload.txt
