/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.ruta.engine;

import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.ruta.engine.HtmlConverterPSpan;
import org.apache.uima.ruta.engine.HtmlConverterPSpanReplacement;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.tags.ScriptTag;
import org.htmlparser.visitors.TextExtractingVisitor;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class HtmlConverterVisitor
extends TextExtractingVisitor {
    private boolean inBody = false;
    private boolean inScript = false;
    private boolean skipWhitespace = true;
    private SortedSet<HtmlConverterPSpan> textSpans = new TreeSet<HtmlConverterPSpan>();
    private SortedSet<HtmlConverterPSpan> linebreaksFromHtmlTags = new TreeSet<HtmlConverterPSpan>();
    private Set<String> newlineInducingTags;

    public HtmlConverterVisitor(Set<String> newlineInducingTags) {
        this.newlineInducingTags = newlineInducingTags;
    }

    @Override
    public void visitStringNode(Text node) {
        super.visitStringNode(node);
        if (!(!this.inBody || this.inScript || this.skipWhitespace && StringUtils.isBlank(node.getText()))) {
            int from = node.getStartPosition();
            int to = node.getEndPosition();
            this.textSpans.add(new HtmlConverterPSpan(from, to, node.getText()));
        }
    }

    @Override
    public void visitTag(Tag tag) {
        super.visitTag(tag);
        String trimmedTagnameLowercase = tag.getTagName().toLowerCase().trim();
        if (trimmedTagnameLowercase.equals("body")) {
            this.inBody = true;
        } else if (trimmedTagnameLowercase.equals("script")) {
            this.inScript = true;
        }
        if (this.newlineInducingTags.contains(trimmedTagnameLowercase)) {
            int begin = tag.getStartPosition();
            this.linebreaksFromHtmlTags.add(new HtmlConverterPSpanReplacement(begin, begin + 1, "\n"));
        }
    }

    @Override
    public void visitEndTag(Tag tag) {
        String tagname = tag.getTagName().toLowerCase().trim();
        if (tagname.equals("body")) {
            this.inBody = false;
        } else if (tagname.equals("script") || tag instanceof ScriptTag) {
            this.inScript = false;
        }
    }

    public SortedSet<HtmlConverterPSpan> getTextSpans() {
        return this.textSpans;
    }

    public SortedSet<HtmlConverterPSpan> getLinebreaksFromHtmlTags() {
        return this.linebreaksFromHtmlTags;
    }
}

