/*
 * Decompiled with CFR 0.152.
 */
package kea.filters;

import java.util.Enumeration;
import java.util.Vector;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.Utils;
import weka.filters.Filter;

public class KEAPhraseFilter
extends Filter
implements OptionHandler {
    private static final long serialVersionUID = 1L;
    protected Range m_SelectCols = new Range();
    protected boolean m_DisallowInternalPeriods = false;

    public String globalInfo() {
        return "This filter splits the text contained by the selected string attributes into phrases.";
    }

    public Enumeration listOptions() {
        Vector<Option> newVector = new Vector<Option>(3);
        newVector.addElement(new Option("\tSpecify list of attributes to process. First and last are valid\n\tindexes. (default none)", "R", 1, "-R <index1,index2-index4,...>"));
        newVector.addElement(new Option("\tInvert matching sense", "V", 0, "-V"));
        newVector.addElement(new Option("\tDisallow internal periods", "P", 0, "-P"));
        return newVector.elements();
    }

    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enable(Capabilities.Capability.NOMINAL_CLASS);
        result.enable(Capabilities.Capability.NO_CLASS);
        return result;
    }

    public void setOptions(String[] options) throws Exception {
        String list = Utils.getOption((char)'R', (String[])options);
        if (list.length() != 0) {
            this.setAttributeIndices(list);
        }
        this.setInvertSelection(Utils.getFlag((char)'V', (String[])options));
        this.setDisallowInternalPeriods(Utils.getFlag((char)'P', (String[])options));
        if (this.getInputFormat() != null) {
            this.setInputFormat(this.getInputFormat());
        }
    }

    public String[] getOptions() {
        String[] options = new String[4];
        int current = 0;
        if (this.getInvertSelection()) {
            options[current++] = "-V";
        }
        if (this.getDisallowInternalPeriods()) {
            options[current++] = "-P";
        }
        if (!this.getAttributeIndices().equals("")) {
            options[current++] = "-R";
            options[current++] = this.getAttributeIndices();
        }
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        super.setInputFormat(instanceInfo);
        this.setOutputFormat(instanceInfo);
        this.m_SelectCols.setUpper(instanceInfo.numAttributes() - 1);
        return true;
    }

    public boolean input(Instance instance) throws Exception {
        if (this.getInputFormat() == null) {
            throw new Exception("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        this.convertInstance(instance);
        return true;
    }

    public boolean batchFinished() throws Exception {
        if (this.getInputFormat() == null) {
            throw new NullPointerException("No input instance format defined");
        }
        this.m_NewBatch = true;
        return this.numPendingOutput() != 0;
    }

    public static void main(String[] argv) {
        try {
            if (Utils.getFlag((char)'b', (String[])argv)) {
                Filter.batchFilterFile((Filter)new KEAPhraseFilter(), (String[])argv);
            } else {
                Filter.filterFile((Filter)new KEAPhraseFilter(), (String[])argv);
            }
        }
        catch (Exception ex) {
            System.out.println(ex.getMessage());
        }
    }

    private void convertInstance(Instance instance) throws Exception {
        double[] instVals = new double[instance.numAttributes()];
        int i = 0;
        while (i < instance.numAttributes()) {
            if (!instance.attribute(i).isString() || instance.isMissing(i)) {
                instVals[i] = instance.value(i);
            } else if (!this.m_SelectCols.isInRange(i)) {
                int index = this.getOutputFormat().attribute(i).addStringValue(instance.stringValue(i));
                instVals[i] = index;
            } else {
                String str = instance.stringValue(i);
                String tokenized = this.tokenize(str);
                int index = this.getOutputFormat().attribute(i).addStringValue(tokenized);
                instVals[i] = index;
            }
            ++i;
        }
        Instance inst = new Instance(instance.weight(), instVals);
        inst.setDataset(this.getOutputFormat());
        this.push(inst);
    }

    public String tokenize(String str) {
        StringBuffer resultStr = new StringBuffer();
        int j = 0;
        boolean phraseStart = true;
        boolean seenNewLine = false;
        boolean haveSeenHyphen = false;
        boolean haveSeenSlash = false;
        while (j < str.length()) {
            boolean isWord = false;
            boolean potNumber = false;
            int startj = j;
            while (j < str.length()) {
                char ch = str.charAt(j);
                if (Character.isLetterOrDigit(ch)) {
                    potNumber = true;
                    isWord = true;
                    ++j;
                    continue;
                }
                if (!this.m_DisallowInternalPeriods && ch == '.' || ch == '@' || ch == '_' || ch == '&' || ch == '/' || ch == '-') {
                    if (j <= 0 || j + 1 >= str.length() || !Character.isLetterOrDigit(str.charAt(j - 1)) || !Character.isLetterOrDigit(str.charAt(j + 1))) break;
                    ++j;
                    continue;
                }
                if (ch != '\'' || j <= 0 || !Character.isLetterOrDigit(str.charAt(j - 1))) break;
                ++j;
            }
            if (isWord) {
                if (!phraseStart) {
                    if (haveSeenHyphen) {
                        resultStr.append('-');
                    } else if (haveSeenSlash) {
                        resultStr.append('/');
                    } else {
                        resultStr.append(' ');
                    }
                }
                resultStr.append(str.substring(startj, j));
                if (j == str.length()) break;
                phraseStart = false;
                seenNewLine = false;
                haveSeenHyphen = false;
                haveSeenSlash = false;
                if (Character.isWhitespace(str.charAt(j))) {
                    if (str.charAt(j) == '\n') {
                        seenNewLine = true;
                    }
                } else if (str.charAt(j) == '-') {
                    haveSeenHyphen = true;
                } else if (str.charAt(j) == '/') {
                    haveSeenSlash = true;
                } else {
                    phraseStart = true;
                    resultStr.append('\n');
                }
                ++j;
                continue;
            }
            if (j == str.length()) break;
            if (str.charAt(j) == '\n') {
                if (seenNewLine) {
                    if (!phraseStart) {
                        resultStr.append('\n');
                        phraseStart = true;
                    }
                } else if (potNumber && !phraseStart) {
                    phraseStart = true;
                    resultStr.append('\n');
                }
                seenNewLine = true;
                ++j;
                continue;
            }
            if (Character.isWhitespace(str.charAt(j))) {
                if (potNumber && !phraseStart) {
                    phraseStart = true;
                    resultStr.append('\n');
                }
                ++j;
                continue;
            }
            if (!phraseStart) {
                resultStr.append('\n');
                phraseStart = true;
            }
            ++j;
        }
        return resultStr.toString();
    }

    public String invertSelectionTipText() {
        return "If set to false, the specified attributes will be processed; If set to true, specified attributes won't be processed.";
    }

    public boolean getInvertSelection() {
        return this.m_SelectCols.getInvert();
    }

    public void setInvertSelection(boolean invert) {
        this.m_SelectCols.setInvert(invert);
    }

    public String disallowInternalPeriodsTipText() {
        return "If set to false, internal periods are allowed.";
    }

    public boolean getDisallowInternalPeriods() {
        return this.m_DisallowInternalPeriods;
    }

    public void setDisallowInternalPeriods(boolean disallow) {
        this.m_DisallowInternalPeriods = disallow;
    }

    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on. This is a comma separated list of attribute indices, with \"first\" and \"last\" valid values. Specify an inclusive range with \"-\". E.g: \"first-3,5,6-10,last\".";
    }

    public String getAttributeIndices() {
        return this.m_SelectCols.getRanges();
    }

    public void setAttributeIndices(String rangeList) {
        this.m_SelectCols.setRanges(rangeList);
    }

    public void setAttributeIndicesArray(int[] attributes) {
        this.setAttributeIndices(Range.indicesToRangeList((int[])attributes));
    }
}

