/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tagging.disambiguation;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.tagging.disambiguation.AbstractDisambiguator;
import org.languagetool.tools.StringTools;

public class MultiWordChunker2
extends AbstractDisambiguator {
    private static final String WRAP_TAG = "<%s>";
    private final String filename;
    private final boolean allowFirstCapitalized;
    private boolean removeOtherReadings = false;
    private String tagFormat = "<%s>";
    private Map<String, List<MultiWordEntry>> tokenToPosTagMap;

    public MultiWordChunker2(String filename) {
        this(filename, false);
    }

    public MultiWordChunker2(String filename, boolean allowFirstCapitalized) {
        this.filename = filename;
        this.allowFirstCapitalized = allowFirstCapitalized;
    }

    public void setRemoveOtherReadings(boolean removeOtherReadings) {
        this.removeOtherReadings = removeOtherReadings;
    }

    public void setWrapTag(boolean wrapTag) {
        this.tagFormat = wrapTag ? WRAP_TAG : null;
    }

    protected String formatPosTag(String posTag, int position, int multiwordLength) {
        return this.tagFormat != null ? String.format(this.tagFormat, posTag) : posTag;
    }

    private void lazyInit() {
        if (this.tokenToPosTagMap != null) {
            return;
        }
        HashMap<String, List<MultiWordEntry>> map = new HashMap<String, List<MultiWordEntry>>();
        try (InputStream stream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(this.filename);){
            List<String> posTokens = this.loadWords(stream);
            for (String posToken : posTokens) {
                List<MultiWordEntry> multiwordItems;
                String[] tokenAndTag = posToken.split("\t");
                if (tokenAndTag.length != 2) {
                    throw new RuntimeException("Invalid format in " + this.filename + ": '" + posToken + "', expected two tab-separated parts");
                }
                String[] tokens = tokenAndTag[0].split(" ");
                String posTag = tokenAndTag[1];
                if (map.containsKey(tokens[0])) {
                    multiwordItems = (List)map.get(tokens[0]);
                } else {
                    multiwordItems = new ArrayList();
                    map.put(tokens[0], multiwordItems);
                }
                multiwordItems.add(new MultiWordEntry(Arrays.asList(tokens), posTag));
            }
            this.tokenToPosTagMap = map;
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public AnalyzedSentence disambiguate(AnalyzedSentence input) {
        AnalyzedTokenReadings[] inputTokens;
        this.lazyInit();
        AnalyzedTokenReadings[] outputTokens = inputTokens = input.getTokens();
        for (int i = 1; i < inputTokens.length; ++i) {
            MultiWordEntry multiwordEntry;
            AnalyzedTokenReadings analyzedToken = inputTokens[i];
            String firstToken = analyzedToken.getToken();
            List<MultiWordEntry> multiwordItems = this.tokenToPosTagMap.get(firstToken);
            if (multiwordItems == null) {
                if (this.allowFirstCapitalized && StringTools.isCapitalizedWord(firstToken)) {
                    multiwordItems = this.tokenToPosTagMap.get(StringTools.lowercaseFirstChar(firstToken));
                }
                if (multiwordItems == null) continue;
            }
            if ((multiwordEntry = this.findMultiwordEntry(inputTokens, i, multiwordItems)) == null) continue;
            int multiwordPos = 0;
            int inputTokenPos = i;
            while (multiwordPos < multiwordEntry.tokens.size()) {
                AnalyzedTokenReadings currentToken = inputTokens[inputTokenPos];
                if (!currentToken.isWhitespace()) {
                    String multiwordTag = this.formatPosTag(multiwordEntry.tag, multiwordPos, multiwordEntry.tokens.size());
                    outputTokens[inputTokenPos] = this.prepareNewReading(multiwordEntry.getLemma(), currentToken.getToken(), currentToken, multiwordTag);
                    ++multiwordPos;
                }
                ++inputTokenPos;
            }
        }
        return new AnalyzedSentence(outputTokens);
    }

    private MultiWordEntry findMultiwordEntry(AnalyzedTokenReadings[] inputTokens, int startingPosition, List<MultiWordEntry> multiwordItems) {
        for (MultiWordEntry multiWordEntry : multiwordItems) {
            if (!this.isMatching(inputTokens, startingPosition, multiWordEntry)) continue;
            return multiWordEntry;
        }
        return null;
    }

    private boolean isMatching(AnalyzedTokenReadings[] inputTokens, int startingPosition, MultiWordEntry multiWordEntry) {
        int j = 1;
        int i = 1;
        while (j < multiWordEntry.tokens.size() && startingPosition + i < inputTokens.length) {
            if (!inputTokens[startingPosition + i].isWhitespace()) {
                if (!this.matches(multiWordEntry.tokens.get(j), inputTokens[startingPosition + i])) {
                    return false;
                }
                ++j;
            }
            ++i;
        }
        return j == multiWordEntry.tokens.size();
    }

    protected boolean matches(String matchText, AnalyzedTokenReadings inputTokens) {
        return matchText.equals(inputTokens.getToken());
    }

    protected AnalyzedTokenReadings prepareNewReading(String tokens, String tok, AnalyzedTokenReadings token, String tag) {
        AnalyzedToken tokenStart = new AnalyzedToken(tok, tag, tokens);
        return this.setAndAnnotate(token, tokenStart);
    }

    private AnalyzedTokenReadings setAndAnnotate(AnalyzedTokenReadings oldReading, AnalyzedToken newReading) {
        String old = oldReading.toString();
        String prevAnot = oldReading.getHistoricalAnnotations();
        List<AnalyzedToken> initialNewReadings = this.removeOtherReadings ? Arrays.asList(newReading) : oldReading.getReadings();
        AnalyzedTokenReadings newAtr = new AnalyzedTokenReadings(initialNewReadings, oldReading.getStartPos());
        newAtr.setWhitespaceBefore(oldReading.isWhitespaceBefore());
        if (!this.removeOtherReadings) {
            newAtr.addReading(newReading);
        }
        newAtr.setHistoricalAnnotations(this.annotateToken(prevAnot, old, newAtr.toString()));
        newAtr.setChunkTags(oldReading.getChunkTags());
        return newAtr;
    }

    private String annotateToken(String prevAnot, String oldReading, String newReading) {
        return prevAnot + "\nMULTIWORD_CHUNKER: " + oldReading + " -> " + newReading;
    }

    private List<String> loadWords(InputStream stream) {
        ArrayList<String> lines = new ArrayList<String>();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));){
            String line;
            while ((line = reader.readLine()) != null) {
                if ((line = line.trim()).isEmpty() || line.charAt(0) == '#') continue;
                lines.add(line);
            }
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        return lines;
    }

    private static final class MultiWordEntry {
        List<String> tokens;
        String tag;

        public MultiWordEntry(List<String> tokens, String tag) {
            this.tokens = tokens;
            this.tag = tag;
        }

        String getLemma() {
            return StringUtils.join(this.tokens, (String)" ");
        }

        public String toString() {
            return "MultiWordEntry [tokens=" + this.tokens + ", tag=" + this.tag + "]";
        }
    }
}

