#!/usr/bin/python3

import argparse
import manatee
from gdex import GDEX, Sentence, DEFAULT_COEF

POSITION_NOT_IN_SENTENCE_SCORE = 0

parser = argparse.ArgumentParser()
parser.add_argument("corpus", help="corpus name")
parser.add_argument("-c", "--configuration", help="path to GDEX configuration file")
parser.add_argument("-t", "--top-score", help="top score (default %d)" % DEFAULT_COEF, type=int, default=DEFAULT_COEF)
parser.add_argument("-s", "--sentence-only", help="disregard keyword position", action="store_true")
args = parser.parse_args()

C = manatee.Corpus(args.corpus)
attributes = C.get_conf('ATTRLIST').split(',')
manatee.setEncoding(C.get_conf('ENCODING'))
struct = C.get_struct('s')

digits = str(len(str(args.top_score)))
format = '%0'+digits+'d'

if args.configuration:
    gdex = GDEX(C, conf_path=args.configuration, precalculated=False)
else:
    gdex = GDEX(C, precalculated=False)
    
lastpos = 0
for s_id in range(struct.size()):
    s_beg = struct.beg(s_id)
    s_end = struct.end(s_id)
    for pos in range(lastpos, s_beg):
        print(format % int(args.top_score * POSITION_NOT_IN_SENTENCE_SCORE))
    if s_end > s_beg:
        gdex_sentence = Sentence(s_beg, s_end, s_beg, 1, attribs=gdex.attribs, invoked_names=gdex.invoked_names)
        if args.sentence_only:
            score = format % int(args.top_score * gdex.get_score(gdex_sentence))
            for pos in range(s_beg, s_end):
                print(score)
        else:
            print(format % int(args.top_score * gdex.get_score(gdex_sentence)))
            while gdex_sentence.next_kw():
                print(format % int(args.top_score * gdex.get_score(gdex_sentence)))
    lastpos = s_end
for pos in range(lastpos, C.size()):
    print(format % int(args.top_score * POSITION_NOT_IN_SENTENCE_SCORE))
 
