#!/usr/bin/env bash
# Copyright (C) 2013-2014 Luke Shumaker <lukeshu@sbcglobal.net>
#
# License: GNU GPLv2+
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

export TEXTDOMAIN='librelib'
export TEXTDOMAINDIR='/usr/share/locale'

default_simple=(
	--keyword={eval_,}{gettext,'ngettext:1,2'}
	--keyword={_,print,term_title}
	--keyword={msg,msg2,warning,error,stat_busy,die}
	--keyword={lock,slock}:3
)
default_prose=(--keyword={prose,bullet})

readonly default_simple default_prose

if ! type gettext &>/dev/null; then
	gettext() { echo "$@"; }
fi

errusage() {
	if [[ $# -gt 0 ]]; then
		fmt="$(gettext "$1")"; shift
		printf "${0##*/}: $fmt\n" "$@"
	fi
	usage >&2
}

usage() {
	. libremessages
	print 'Usage: %s [OPTIONS] FILES...' "${0##*/}"
	print 'Generates .pot files for programs using libremessages'
	echo
	prose 'The keyword format is the same as in GNU xgettext.'
	echo
	prose 'The default simple keywords are: %s' "${default_simple[*]#--keyword=}"
	echo
	prose 'The default prose keywords are: %s' "${default_prose[*]#--keyword=}"
	echo
	print 'Options:'
	flag '--simple=KEYWORD' 'Look for KEYWORD as an additional simple keyword'
	flag '--prose=KEYWORD' 'Look for KEYWORD as an additional prose keyword'
	flag '-k' 'Disable using the default keywords'
	flag '-h, --help' 'Show this text'
}

xgettext-sh() {
	xgettext --omit-header --from-code=UTF-8 -L shell -k -o - "$@"
}

xgettext-flag() {
	{
		declare -i x=0
		declare -i i
		for (( i=1; x < 3; i++ )); do
			local out
			out="$(xgettext-sh --keyword="flag:$i,\"$i\"" "$@")"
			if [[ -n $out ]]; then
				printf -- '%s\n' "$out"
			else
				x+=1
			fi
		done
	} | whitespace-collapse | sed '/^\#, sh-format/d' | {
		declare -i i

		IFS=''
		local segments=()
		local re='^#\. ([0-9]+)$'
		local line
		while read -r line; do
			if [[ $line =~ $re ]]; then
				i=${BASH_REMATCH[1]}
			else
				segments[$i]+="$line"$'\n'
			fi
		done

		declare -i last=$(printf '%s\n' "${!segments[@]}"|sed -n '$p')

		IFS=$'\n'
		local flag=true
		for (( i=1; i <= last; i++ )); do
			if $flag; then
				local lines=(${segments[$i]})
				if [[ ${lines[1]} == *':"' ]]; then
					printf -- '%s\n' "${segments[$i]}"
				else
					flag=false
				fi
			else
				if [[ -n ${segments[$i]} ]]; then
					printf -- '%s\n' "${segments[$i]}"
				fi
				flag=true
			fi
		done
	}
}

whitespace-collapse() {
	tr '\n' '\r' | sed 's/"\r\s*"//g' | tr '\r' '\n' | # This removes the awkward word-wrapping done by xgettext
	    sed -r -e 's/(\\n|\\t|\t)/ /g' -e 's/(^|[^.!? ]) +/\1 /g' -e 's/([.!?])  +/\1  /g' # This collapses whitespace
}

main() {
	local simple=()
	local prose=()
	local files=()
	local use_defaults=true
	local error=false

	declare -i i
	for (( i=1; i <= $#; i++ )); do
		case "${!i}" in
			--simple) i+=1; simple+=(--keyword="${!i}");;
			--simple=*)     simple+=(--keyword="${!i#*=}");;
			--prose) i+=1; prose+=(--keyword="${!i}");;
			--prose=*)     prose+=(--keyword="${!i#*=}");;
			-k) use_defaults=false;;
			--help|-h) usage; return 0;;
			--) i+=1; break;;
			-*) errusage "unrecognized option: %s" "${!i}"; error=true;;
			*) files+=("${!i}");;
		esac
	done
	files+=("${@:$i}")
	if [[ ${#files[@]} -lt 1 ]]; then
		errusage "no input file given"
		error=true
	fi
	if "$error"; then
		return 1
	fi
	if "$use_defaults"; then
		simple+=("${default_simple[@]}")
		prose+=("${default_prose[@]}")
	fi

	# Main code
	{
		xgettext-sh "${simple[@]}" -- "${files[@]}"
		xgettext-sh "${prose[@]}" -- "${files[@]}" | whitespace-collapse
		xgettext-flag -- "${files[@]}"
	} | sed '/^\#, sh-format/d' | msguniq -Fi --to-code=UTF-8
}

main "$@"
