/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.transform.tokenize;

import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.transform.tokenize.Tokenizer;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplierCount;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplierHash;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplierPosition;
import org.apache.sysds.runtime.transform.tokenize.builder.TokenizerBuilderNgram;
import org.apache.sysds.runtime.transform.tokenize.builder.TokenizerBuilderWhitespaceSplit;
import org.apache.wink.json4j.JSONArray;
import org.apache.wink.json4j.JSONObject;

public class TokenizerFactory {
    public static Tokenizer createTokenizer(String spec, int maxTokens) {
        Tokenizer tokenizer = null;
        try {
            TokenizerApplier tokenizerApplier;
            TokenizerBuilderWhitespaceSplit tokenizerBuilder;
            JSONObject jSpec = new JSONObject(spec);
            String algo = jSpec.getString("algo");
            JSONObject algoParams = null;
            if (jSpec.has("algo_params")) {
                algoParams = jSpec.getJSONObject("algo_params");
            }
            String out = jSpec.getString("out");
            JSONObject outParams = null;
            if (jSpec.has("out_params")) {
                outParams = jSpec.getJSONObject("out_params");
            }
            int tokenizeCol = jSpec.getInt("tokenize_col");
            JSONArray idColsJsonArray = jSpec.getJSONArray("id_cols");
            int[] idCols = new int[idColsJsonArray.length()];
            for (int i = 0; i < idColsJsonArray.length(); ++i) {
                idCols[i] = idColsJsonArray.getInt(i);
            }
            int numIdCols = idCols.length;
            boolean wideFormat = false;
            if (jSpec.has("format_wide")) {
                wideFormat = jSpec.getBoolean("format_wide");
            }
            boolean applyPadding = false;
            if (jSpec.has("apply_padding")) {
                applyPadding = jSpec.getBoolean("apply_padding");
            }
            switch (algo) {
                case "split": {
                    tokenizerBuilder = new TokenizerBuilderWhitespaceSplit(idCols, tokenizeCol, algoParams);
                    break;
                }
                case "ngram": {
                    tokenizerBuilder = new TokenizerBuilderNgram(idCols, tokenizeCol, algoParams);
                    break;
                }
                default: {
                    throw new IllegalArgumentException("Algorithm {algo=" + algo + "} is not supported.");
                }
            }
            switch (out) {
                case "count": {
                    tokenizerApplier = new TokenizerApplierCount(numIdCols, maxTokens, wideFormat, applyPadding, outParams);
                    break;
                }
                case "position": {
                    tokenizerApplier = new TokenizerApplierPosition(numIdCols, maxTokens, wideFormat, applyPadding);
                    break;
                }
                case "hash": {
                    tokenizerApplier = new TokenizerApplierHash(numIdCols, maxTokens, wideFormat, applyPadding, outParams);
                    break;
                }
                default: {
                    throw new IllegalArgumentException("Output representation {out=" + out + "} is not supported.");
                }
            }
            tokenizer = new Tokenizer(tokenizerBuilder, tokenizerApplier);
        }
        catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
        return tokenizer;
    }
}

