#!/bin/sh -e
# makewhatis: create the whatis database
# Created: Sun Jun 14 10:49:37 1992
# Revised: Sat Jan  8 14:12:37 1994 by faith@cs.unc.edu
# Revised: Sat Mar 23 17:56:18 1996 by micheal@actrix.gen.nz
# ALT Linux adaptations by Dmitry V. Levin <ldv@altlinux.org>,
#                          Alexey Gladkov <legion@altlinux.org>
# Copyright 1992, 1993, 1994 Rickard E. Faith (faith@cs.unc.edu)
# May be freely distributed and modified as long as copyright is retained.
#
# Wed Dec 23 13:27:50 1992: Rik Faith (faith@cs.unc.edu) applied changes
# based on Mitchum DSouza (mitchum.dsouza@mrc-apu.cam.ac.uk) cat patches.
# Also, cleaned up code and make it work with NET-2 doc pages.
#
# makewhatis-1.4: aeb 940802, 941007, 950417
# Fixed so that the -c option works correctly for the cat pages
# on my machine. Fix for -u by Nan Zou (nan@ksu.ksu.edu).
# Many minor changes.
# The -s option is undocumented, and may well disappear again.
#
# Sat Mar 23 1996: Michael Hamilton (michael@actrix.gen.nz).
# I changed the script to invoke gawk only once for each directory tree.
# This speeds things up considerably (from 30 minutes down to 1.5 minutes
# on my 486DX66).
# 960401 - aeb: slight adaptation to work correctly with cat pages.
# 960510 - added fixes by brennan@raven.ca.boeing.com, author of mawk.
# 971012 - replaced "test -z" - it doesnt work on SunOS 4.1.3_U1.
# 980710 - be more careful with TMPFILE.
# 000323 - do not change PATH, better treatment of catpages - Bryan Henderson.
# 011117 - avoid suspicious filenames.
# 030310 - find files only; fix LAPACK cruft; no /usr/man default;
#	use /dev/stderr instead of /dev/tty; handle files with strange names;
#	add support for chinese, hungarian, indonesian, japanese, korean,
#	polish, russian (Thierry Vignaud).
#
# makewhatis 1.6: Federico Lucifredi
# 060608 - Corrected traps.
# 060719 - section choosing behavior to match man's (Mike frysinger).
#
# Note for Slackware users: "makewhatis -v -w -c" will work.
#
# makewhatis flc 060719 (from man-1.6f)

PROG="${0##*/}"
PATH=/bin:/usr/bin
export PATH
umask 022

. shell-quote

exit_handler()
{
	local rc=$?
	trap '' EXIT
	[ -z "$TMPFILES_DIR" ] || rm -rf -- "$TMPFILES_DIR"
	exit $rc
}

verbose=
message()
{
	[ -z "$verbose" ] || printf '%s\n' "$*" >&2
}

: ${TMPDIR:=$HOME/tmp}
[ -d "$TMPDIR" ] || TMPDIR=/tmp
TMPFILES_DIR="$(mktemp -d -t "$PROG.XXXXXXXXXX")"

trap exit_handler SIGHUP SIGPIPE SIGINT SIGQUIT SIGTERM EXIT

[ ! -s /etc/profile.d/lang.sh ] || . /etc/profile.d/lang.sh
DEFMANPATH=`man --path`

SECTIONS=`echo "1:1p:8:2:3:3p:4:5:6:7:9:0p:tcl:n:l:p:o" | tr : \ `

# this allows to define language specific values fro NAME, DESCRIPTION
# if not defined, using those default values
MAN_NAME='^"?([Ии][Мм][Яя]|[Нн][Аа][Зз][Вв][Аа][Нн][Ии][Ее]|[Нн][Аа][Ии][Мм][Ее][Нн][Оо][Вв][Аа][Нн][Ии][Ее]|ИМЕ|NOM|JMÉNO|NAVN|ΟΝΟΜΑ|NOMBRE|NIME|IZENA|NIMI|IME|NÉV|NOME|名前|이름|NAAM|NAZWA|NUME|MENO|НАЗВА|名称|名稱)"?'
MAN_DESCRIPTION='^"?([Оо][Пп][Ии][Сс][Аа][Нн][Ии][Ее]|DESCRIPCIÓ|POPIS|BESKRIVELSE|BESCHREIBUNG|ΠΕΡΙΓΡΑΦΗ|DESCRIPCIÓN|KIRJELDUS|AZALPENA|KUVAUS|OPIS|LEÍRÁS|DESCRIZIONE|説明|설명|BESCHRIJVING|DESCRIÇÃO|DESCRIERE|ОПИС|描述)"?'

usage()
{
	cat >&2 <<EOF
Build the whatis database for the man pages.

Usage: $PROG [options] [manpath]

Valid options are:
-u             : update database with new pages
-l manlist     : file with man files list for update
-v             : verbose
[manpath]      : man directories (default: $DEFMANPATH)
EOF
	[ -n "$1" ] && exit "$1" || exit
}

TEMP=`getopt -n $PROG -o huvl: -- "$@"` || usage
eval set -- "$TEMP"

deffindarg='-size +0'
findarg=
update=
pages=
pages_list=
while :; do
	case "$1" in
		-u) findarg='-ctime 0'
			update=1; shift
			;;
		-l) shift; pages_list="$1"; shift
			;;
		-v) verbose=1; shift
			;;
		-h) usage 0
			;;
		--) shift; break
			;;
		*) echo "$PROG: unrecognized option: $1" >&2; exit 1
			;;
	esac
done

[ -n "$*" ] &&
	manpath="$*" ||
	manpath=`echo "$DEFMANPATH" | tr : \ `

mandir_cache()
{
	local d mandir="$1"
	[ -d "$mandir" -a "$mandir" != '/usr/man' ] ||
		return 1

	d='/var/cache/man'
	case "$mandir" in
		/usr/share/man*)			d="$d${mandir#/usr/share/man}" ;;
		/usr/X11R6/man)				d="$d/X11R6" ;;
		/usr/lib/perl5/man)			d="$d/perl"  ;;
		/usr/local/man|/usr/local/share/man)	d="$d/local" ;;
		*)					d="$mandir"  ;;
	esac
	[ -d "$d" ] ||
		return 1

	printf '%s' "$d"
}

make_tmpfile()
{
	local dir="$1"; shift
	local tmp_dir="$TMPFILES_DIR/${dir#/var/cache/man}"

	mkdir -p "$tmp_dir"
	if [ -s "$dir/whatis" ]; then
		[ -e "$tmp_dir/whatis" ] ||
			cp -a "$dir/whatis" "$tmp_dir/whatis"
	else
		touch "$tmp_dir/whatis"
	fi
	echo "$tmp_dir/whatis"
}

commit_db()
{
	local src="$1"; shift
	local dest="$1"; shift

	[ -s "$src" ] || return 0
	if [ ! -s "$dest" -o "$src" -nt "$dest" ]; then
		LC_COLLATE=C sort -u -o "$dest" "$src"
	fi
}

get_charset()
{
	local l
	read -r l
	case "$l" in
		'.\" -*- mode: troff; coding: '*'-*-') ;;
		*) return 0 ;;
	esac
	set $l
	printf '%s' "$6"
}

get_def_charset()
{
	[ ! -f "$1/.charset" ] || cat "$1/.charset"
}

get_manname()
{
	local manfile="$1"
	local section="$2"

	manname="${manfile##*/}"
	manname="${manname%.*}"
	echo "${manname%.$section}"
}

process_manfile()
{
	local manfile="$1"
	local actual_section="$2"
	local def_charset="${3:-}"


	progname="$(get_manname "$manfile" "$actual_section")"

	charset="$(man-source "$manfile" |get_charset)"
	[ -n "$charset" ] ||
		charset="$def_charset"

	iconv_cmd=cat
	[ -z "$charset" -o -z "$ICONV" ] ||
		iconv_cmd="$ICONV -f $charset -t utf-8"

	[ "`echo TEST | $iconv_cmd`" = "TEST" ] ||
		iconv_cmd=cat

	man-source "$manfile" |
	$iconv_cmd |
	gawk '
	    BEGIN {
	      insh = 0; thisjoin = 1; done = 0;
	      entire_line = "";

	      while (!done && getline > 0) {
		gsub(/.\b/, "");
		if (($1 ~ /^\.[Ss][Hh]/ &&
		  ($2 ~ /[Nn][Aa][Mm][Ee]/ ||
		   $2 ~ man_name )) ||
		  (pages == "cat" && $1 ~ /^NAME/)) {
		    if (!insh) {
		      insh = 1;
		    } else {
		      done = 1;
		    }
		} else if (insh) {
		  if ($1 ~ /^\.[Ss][HhYS]/ ||
		    (pages == "cat" &&
		    ($1 ~ /^S[yYeE]/ || $1 ~ /^DESCRIPTION/ ||
		     $1 ~ man_description ||
		     $1 ~ /^COMMAND/ || $1 ~ /^OVERVIEW/ ||
		     $1 ~ /^STRUCTURES/ || $1 ~ /^INTRODUCTION/ ||
		     $0 ~ /^[^ ]/))) {
		      # end insh for Synopsis, Syntax, but also for
		      # DESCRIPTION (e.g., XFree86.1x),
		      # COMMAND (e.g., xspread.1)
		      # OVERVIEW (e.g., TclCommandWriting.3)
		      # STRUCTURES (e.g., XEvent.3x)
		      # INTRODUCTION (e.g., TclX.n)
		      # and anything at all that begins in Column 1, so
		      # is probably a section header.
		    done = 1;
		  } else {
		    if ($0 ~ progname"-") {  # Fix old cat pages
			sub(progname"-", progname" - ");
		    }
		    if ($0 ~ /[^ \\]-$/) {
		      sub(/-$/, "");	  # Handle Hyphenations
		      nextjoin = 1;
		    } else if ($0 ~ /\\c$/) {
		      sub(/\\c$/, "");	  # Handle Continuations
		      nextjoin = 1;
		    } else
		      nextjoin = 0;

		    sub(/^.[IB] /, "");       # Kill bold and italics
		    sub(/^.BI /, "");         #
		    sub(/^.SM /, "");         # Kill small
		    sub(/^.Nm /, "");         # Kill bold
		    sub(/^.Tn /, "");         # Kill normal
	            sub(/^.Li /, "");         # Kill .Li
	            sub(/^.Dq /, "");         # Kill .Dq
	            sub(/^.Nd */, "- ");      # Convert .Nd to dash
		    sub(/\\\".*/, "");        # Trim pending comments
		    sub(/  *$/, "");          # Trim pending spaces
		    sub(/^\.$/, "");          # Kill blank comments
		    sub(/^'"'"'.*/, "");      # Kill comment/troff lines
		    sub(/^.in .*/, "");       # Kill various macros
		    sub(/^.ad .*/, "");
		    sub(/^.ti .*/, "");
		    sub(/^.ta .*/, "");
		    sub(/^.Vb .*/, "");
		    sub(/^.[PLTH]P$/, "");    # .PP/.LP/.TP/.HP
		    sub(/^.Pp$/, "");
		    sub(/^.[iI]X .*$/, "");
		    sub(/^.nolinks$/, "");
		    sub(/^.B$/, "");
		    sub(/^.nf$/, "");

		    if (($1 ~ /^\.../ || $1 == "") &&
		        (entire_line ~ / - / || entire_line ~ / \\- /)) {
		      # Assume that this ends the description of one line
		      # Sometimes there are several descriptions in one page,
		      # as in outb(2).
		      handle_entire_line();
		      entire_line = "";
		      thisjoin = 1;
		    } else {
		      if (thisjoin) {
			entire_line = entire_line $0;
		      } else {
			entire_line = entire_line " " $0;
		      }
		      thisjoin = nextjoin;
		    }
		  }
		}
	      }
	      handle_entire_line();
	    }

	    function handle_entire_line() {
	      x = entire_line;             # Keep it short

	      gsub(/\015/, "", x);         # Kill DOS remains
	      gsub(/	/, " ", x);        # Translate tabs to spaces
	      gsub(/  +/, " ", x);         # Collapse spaces
	      gsub(/ *, */, ", ", x);      # Fix comma spacings
	      sub(/^ /, "", x);            # Kill initial spaces
	      sub(/ $/, "", x);            # Kill trailing spaces
	      sub(/__+/, "_", x);          # Collapse underscores

	      gsub(/\\f\(../, "", x);         # Kill font changes
	      gsub(/\\f[PRIB0123]/, "", x);   # Kill font changes
	      gsub(/\\s[-+0-9]*/, "", x);     # Kill size changes
	      gsub(/\\&/, "", x);             # Kill \&
	      gsub(/\\\|/, "", x);            # Kill \|
	      gsub(/\\\((ru|ul)/, "_", x);    # Translate
	      gsub(/\\\((mi|hy|em)/, "-", x); # Translate
	      gsub(/\\\*\(../, "", x);        # Kill troff strings
	      gsub(/\\/, "", x);              # Kill all backslashes
	      gsub(/"/, "", x);               # Kill quotes (from .Nd "foo bar")
	      sub(/<h1 align=center>/, "", x);# Yuk! HTML cruft
	      gsub(/\000.*/, "X", x);         # Binary cruft in LAPACK pages
	      gsub(/  +/, " ", x);            # Collapse spaces (again)
	      sub(/^ /, "", x);               # Kill initial spaces (again)
	      sub(/ $/, "", x);               # Kill trailing spaces (again)
	      sub(/\.$/, "", x);              # Kill trailing period

	      if (!match(x, / - /))
		return;

	      after_dash = substr(x, RSTART);
	      head = substr(x, 1, RSTART-1) ", ";
	      while (match(head, /, /)) {
		prog = substr(head, 1, RSTART-1);
		head = substr(head, RSTART+2);
		if (prog != progname)
		  prog = prog " [" progname "]";
		printf "%-*s (%s) %s\n", 20, prog, actual_section, after_dash;
	      }
	    }
	    ' pages=$pages \
	      man_name="$MAN_NAME" man_description="$MAN_DESCRIPTION" \
	      progname="$progname" actual_section="$actual_section"
}

# first truncate all the whatis files that will be created new,
# then only update - we might visit the same directory twice
if [ -z "$update" -a -z "$pages_list" ]; then
	for mandir in $manpath; do
		! destdir="$(mandir_cache "$mandir")" ||
			:>$destdir/whatis
	done
fi

ICONV=/usr/bin/iconv
[ -x "$ICONV" ] || ICONV=

if [ -n "$pages_list" ]; then
	[ "$pages_list" = "-" -o -s "$pages_list" ] || usage 1

	cat "$pages_list" | while read manfile; do
		curdir="${manfile%/*}"
		section="${curdir##*/man}"
		destdir="$(mandir_cache "${curdir%/man*}")" ||
			continue
		TMPFILE="$(make_tmpfile "$destdir")"

		if [ -s "$TMPFILE" ]; then
			manname="$(get_manname "$manfile" "$section")"
			quote_sed_regexp_variable manname "$manname"
			# Save list of mans and delete them later
			[ -s "$TMPFILE.del-$section" ] &&
				printf '\|%s' "$manname" >>"$TMPFILE.del-$section" ||
				printf '%s' "$manname" >"$TMPFILE.del-$section"
		fi

		if [ -e "$manfile" -a ! -L "$manfile" ]; then
			def_charset="$(get_def_charset "${manfile%/man$section/*}")"
			process_manfile "$manfile" "$section" "$def_charset" >>"$TMPFILE.add"
		fi
	done
	find "$TMPFILES_DIR" -type f -name whatis -print |
	while read i; do
		find "${i%whatis}" -maxdepth 1 -mindepth 1 -type f -name 'whatis.del-*' |
		while read dellist; do
			section="${dellist##*/whatis.del-}"
			printf '%s' '/^.\+ \[\('
			cat "$dellist"
			printf '%s\n' "\)\][[:blank:]]\+($section)/d"
			printf '%s' '/^\('
			cat "$dellist"
			printf '%s\n' "\)[[:blank:]]\+($section)/d"
		done > "$i.del"
		[ ! -s "$i.del" ] ||
			sed -i -f "$i.del" "$i"
		[ ! -s "$i.add" ] ||
			cat "$i.add" >>"$i"

		commit_db "$i" "/var/cache/man/${i#$TMPFILES_DIR}"
	done
else
	for mandir in $manpath; do
		destdir="$(mandir_cache "$mandir")" ||
			continue
		TMPFILE="$(make_tmpfile "$destdir")"

		message "about to enter $mandir"

		if [ -z "$update" -a -s "$destdir/whatis" ]; then
			message "skipping $mandir - we did it already"
		else
			here=$PWD
			cd "$mandir"

			def_charset="$(get_def_charset $mandir)"

			for i in $SECTIONS; do
				if [ -d "man$i" ]; then
					cd "man$i"
					export section=$i
					export curdir="$mandir/man$i"
					find -type f -name '*.*' $findarg $deffindarg -print |
					while read manfile; do
						process_manfile "$manfile" "$i" "$def_charset"
					done
					cd ..
				 fi
			done >> "$TMPFILE"

			cd "$here"

			commit_db "$TMPFILE" "$destdir/whatis"
		fi
	done
fi
