/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.neural;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.io.Serializable;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.ejml.simple.SimpleMatrix;

public class Embedding
implements Serializable {
    private static final long serialVersionUID = 4925779982530239054L;
    private static final Redwood.RedwoodChannels log = Redwood.channels(Embedding.class);
    private Map<String, SimpleMatrix> wordVectors;
    private int embeddingSize;
    static final String START_WORD = "*START*";
    static final String END_WORD = "*END*";
    static final String UNKNOWN_WORD = "*UNK*";
    static final String UNKNOWN_NUMBER = "*NUM*";
    static final String UNKNOWN_CAPS = "*CAPS*";
    static final String UNKNOWN_CHINESE_YEAR = "*ZH_YEAR*";
    static final String UNKNOWN_CHINESE_NUMBER = "*ZH_NUM*";
    static final String UNKNOWN_CHINESE_PERCENT = "*ZH_PERCENT*";
    static final Pattern NUMBER_PATTERN = Pattern.compile("-?[0-9][-0-9,.:]*");
    static final Pattern CAPS_PATTERN = Pattern.compile("[a-zA-Z]*[A-Z][a-zA-Z]*");
    static final Pattern CHINESE_YEAR_PATTERN = Pattern.compile("[\u3007\u96f6\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19]{4}+\u5e74");
    static final Pattern CHINESE_NUMBER_PATTERN = Pattern.compile("(?:[\u3007\uff10\u96f6\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19\u5341\u767e\u4e07\u5343\u4ebf]+[\u70b9\u591a]?)+");
    static final Pattern CHINESE_PERCENT_PATTERN = Pattern.compile("\u767e\u5206\u4e4b[\u3007\uff10\u96f6\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19\u5341\u70b9]+");
    static final Pattern DG_PATTERN = Pattern.compile(".*DG.*");

    public Embedding(Map<String, SimpleMatrix> wordVectors) {
        this.wordVectors = wordVectors;
        this.embeddingSize = Embedding.getEmbeddingSize(wordVectors);
    }

    public Embedding(String wordVectorFile) {
        this(wordVectorFile, 0);
    }

    public Embedding(String wordVectorFile, int embeddingSize) {
        this.wordVectors = Generics.newHashMap();
        this.embeddingSize = embeddingSize;
        this.loadWordVectors(wordVectorFile);
    }

    public Embedding(String wordFile, String vectorFile) {
        this(wordFile, vectorFile, 0);
    }

    public Embedding(String wordFile, String vectorFile, int embeddingSize) {
        this.wordVectors = Generics.newHashMap();
        this.embeddingSize = embeddingSize;
        this.loadWordVectors(wordFile, vectorFile);
    }

    private void loadWordVectors(String wordVectorFile) {
        log.info("# Loading embedding ...\n  word vector file = " + wordVectorFile);
        boolean warned = false;
        int numWords = 0;
        for (String line : IOUtils.readLines(wordVectorFile, "utf-8")) {
            String[] lineSplit = line.split("\\s+");
            String word = lineSplit[0];
            if (word.equals("UNKNOWN") || word.equals("UUUNKKK") || word.equals("UNK") || word.equals("*UNKNOWN*") || word.equals("<unk>")) {
                word = UNKNOWN_WORD;
            }
            if (word.equals("<s>")) {
                word = START_WORD;
            }
            if (word.equals("</s>")) {
                word = END_WORD;
            }
            int dimOfWords = lineSplit.length - 1;
            if (this.embeddingSize <= 0) {
                this.embeddingSize = dimOfWords;
                log.info("  detected embedding size = " + dimOfWords);
            }
            if (dimOfWords > this.embeddingSize) {
                if (!warned) {
                    warned = true;
                    log.info("WARNING: Dimensionality of numHid parameter and word vectors do not match, deleting word vector dimensions to fit!");
                }
                dimOfWords = this.embeddingSize;
            } else if (dimOfWords < this.embeddingSize) {
                throw new RuntimeException("Word vectors file has dimension too small for requested numHid of " + this.embeddingSize);
            }
            double[][] vec = new double[dimOfWords][1];
            for (int i = 1; i <= dimOfWords; ++i) {
                vec[i - 1][0] = Double.parseDouble(lineSplit[i]);
            }
            SimpleMatrix vector = new SimpleMatrix(vec);
            this.wordVectors.put(word, vector);
            ++numWords;
        }
        log.info("  num words = " + numWords);
    }

    private void loadWordVectors(String wordFile, String vectorFile) {
        log.info("# Loading embedding ...\n  word file = " + wordFile + "\n  vector file = " + vectorFile);
        boolean warned = false;
        int numWords = 0;
        Iterator<String> wordIterator = IOUtils.readLines(wordFile, "utf-8").iterator();
        for (String line : IOUtils.readLines(vectorFile, "utf-8")) {
            String[] lineSplit = line.split("\\s+");
            String word = wordIterator.next();
            if (word.equals("UNKNOWN") || word.equals("UUUNKKK") || word.equals("UNK") || word.equals("*UNKNOWN*") || word.equals("<unk>")) {
                word = UNKNOWN_WORD;
            }
            if (word.equals("<s>")) {
                word = START_WORD;
            }
            if (word.equals("</s>")) {
                word = END_WORD;
            }
            int dimOfWords = lineSplit.length;
            if (this.embeddingSize <= 0) {
                this.embeddingSize = dimOfWords;
                log.info("  detected embedding size = " + dimOfWords);
            }
            if (dimOfWords > this.embeddingSize) {
                if (!warned) {
                    warned = true;
                    log.info("WARNING: Dimensionality of numHid parameter and word vectors do not match, deleting word vector dimensions to fit!");
                }
                dimOfWords = this.embeddingSize;
            } else if (dimOfWords < this.embeddingSize) {
                throw new RuntimeException("Word vectors file has dimension too small for requested numHid of " + this.embeddingSize);
            }
            double[][] vec = new double[dimOfWords][1];
            for (int i = 0; i < dimOfWords; ++i) {
                vec[i][0] = Double.parseDouble(lineSplit[i]);
            }
            SimpleMatrix vector = new SimpleMatrix(vec);
            this.wordVectors.put(word, vector);
            ++numWords;
        }
        log.info("  num words = " + numWords);
    }

    public void writeToFile(String filename) throws IOException {
        IOUtils.writeObjectToFile(this.wordVectors, filename);
    }

    public int size() {
        return this.wordVectors.size();
    }

    public Collection<SimpleMatrix> values() {
        return this.wordVectors.values();
    }

    public Set<String> keySet() {
        return this.wordVectors.keySet();
    }

    public Set<Map.Entry<String, SimpleMatrix>> entrySet() {
        return this.wordVectors.entrySet();
    }

    public SimpleMatrix get(String word) {
        if (this.wordVectors.containsKey(word)) {
            return this.wordVectors.get(word);
        }
        return this.wordVectors.get(UNKNOWN_WORD);
    }

    public boolean containsWord(String word) {
        return this.wordVectors.containsKey(word);
    }

    public SimpleMatrix getStartWordVector() {
        return this.wordVectors.get(START_WORD);
    }

    public SimpleMatrix getEndWordVector() {
        return this.wordVectors.get(END_WORD);
    }

    public SimpleMatrix getUnknownWordVector() {
        return this.wordVectors.get(UNKNOWN_WORD);
    }

    public Map<String, SimpleMatrix> getWordVectors() {
        return this.wordVectors;
    }

    public int getEmbeddingSize() {
        return this.embeddingSize;
    }

    public void setWordVectors(Map<String, SimpleMatrix> wordVectors) {
        this.wordVectors = wordVectors;
        this.embeddingSize = Embedding.getEmbeddingSize(wordVectors);
    }

    private static int getEmbeddingSize(Map<String, SimpleMatrix> wordVectors) {
        if (!wordVectors.containsKey(UNKNOWN_WORD)) {
            String unkStr = "";
            if (wordVectors.containsKey("UNK")) {
                unkStr = "UNK";
            }
            if (wordVectors.containsKey("UUUNKKK")) {
                unkStr = "UUUNKKK";
            }
            if (wordVectors.containsKey("UNKNOWN")) {
                unkStr = "UNKNOWN";
            }
            if (wordVectors.containsKey("*UNKNOWN*")) {
                unkStr = "*UNKNOWN*";
            }
            if (wordVectors.containsKey("<unk>")) {
                unkStr = "<unk>";
            }
            if (!unkStr.isEmpty()) {
                wordVectors.put(UNKNOWN_WORD, wordVectors.get(unkStr));
            } else {
                throw new RuntimeException("! wordVectors used to initialize Embedding doesn't contain any recognized form of *UNK*");
            }
        }
        return wordVectors.get(UNKNOWN_WORD).getNumElements();
    }
}

