#!/usr/bin/python
# -*- coding: utf-8 -*-

# Parse the Apache log life and extract the PATH field

import argparse
import os.path
import re
import sys

from knapsack.utils import parse_file
from knapsack.utils import remove_version_or_discard

BOTS='/usr/share/python-knapsack/bots.txt'

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Parse software.o.o downloads information and get the PATH field.')
    parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
                        default=sys.stdin,
                        help='Logfiles used to read the information')
    parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'),
                        default=sys.stdout)
    args = parser.parse_args()

    bots = set(l.strip() for l in open(BOTS))
    parse_file(args.infile, args.outfile, bots)
