/*
 * Decompiled with CFR 0.152.
 */
package org.apache.solr.client.solrj.io.eval;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.eval.ManyValueWorker;
import org.apache.solr.client.solrj.io.eval.Matrix;
import org.apache.solr.client.solrj.io.eval.RecursiveObjectEvaluator;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;

public class TermVectorsEvaluator
extends RecursiveObjectEvaluator
implements ManyValueWorker {
    protected static final long serialVersionUID = 1L;
    private int minTermLength = 3;
    private double minDocFreq = 0.05;
    private double maxDocFreq = 0.5;
    private String[] excludes;

    public TermVectorsEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
        super(expression, factory);
        List<StreamExpressionNamedParameter> namedParams = factory.getNamedOperands(expression);
        for (StreamExpressionNamedParameter namedParam : namedParams) {
            if (namedParam.getName().equals("minTermLength")) {
                this.minTermLength = Integer.parseInt(namedParam.getParameter().toString().trim());
                continue;
            }
            if (namedParam.getName().equals("minDocFreq")) {
                this.minDocFreq = Double.parseDouble(namedParam.getParameter().toString().trim());
                if (!(this.minDocFreq < 0.0) && !(this.minDocFreq > 1.0)) continue;
                throw new IOException("Doc frequency percentage must be between 0 and 1");
            }
            if (namedParam.getName().equals("maxDocFreq")) {
                this.maxDocFreq = Double.parseDouble(namedParam.getParameter().toString().trim());
                if (!(this.maxDocFreq < 0.0) && !(this.maxDocFreq > 1.0)) continue;
                throw new IOException("Doc frequency percentage must be between 0 and 1");
            }
            if (namedParam.getName().equals("exclude")) {
                this.excludes = namedParam.getParameter().toString().split(",");
                continue;
            }
            throw new IOException("Unexpected named parameter:" + namedParam.getName());
        }
    }

    @Override
    public Object doWork(Object ... objects) throws IOException {
        if (objects.length == 1) {
            if (!(objects[0] instanceof List)) {
                throw new IOException("The termVectors function expects a list of Tuples as a parameter.");
            }
            List list = (List)objects[0];
            if (list.size() > 0) {
                Object o = list.get(0);
                if (!(o instanceof Tuple)) {
                    throw new IOException("The termVectors function expects a list of Tuples as a parameter.");
                }
            } else {
                throw new IOException("Empty list was passed as a parameter to termVectors function.");
            }
            List tuples = (List)objects[0];
            TreeMap<String, Integer> docFreqs = new TreeMap<String, Integer>();
            ArrayList<String> rowLabels = new ArrayList<String>();
            for (Tuple tuple : tuples) {
                HashSet<String> docTerms = new HashSet<String>();
                if (tuple.get("terms") == null) {
                    throw new IOException("The document tuples must contain a terms field");
                }
                List terms = (List)tuple.get("terms");
                String id = tuple.getString("id");
                rowLabels.add(id);
                block1: for (String term : terms) {
                    if (term.length() < this.minTermLength) continue;
                    if (this.excludes != null) {
                        String[] stringArray = this.excludes;
                        int n = this.excludes.length;
                        int n2 = 0;
                        while (n2 < n) {
                            String exclude = stringArray[n2];
                            if (term.indexOf(exclude) > -1) continue block1;
                            ++n2;
                        }
                    }
                    if (docTerms.contains(term)) continue;
                    docTerms.add(term);
                    if (docFreqs.containsKey(term)) {
                        int count = (Integer)docFreqs.get(term);
                        docFreqs.put(term, ++count);
                        continue;
                    }
                    docFreqs.put(term, 1);
                }
            }
            int min = (int)((double)tuples.size() * this.minDocFreq);
            int max = (int)((double)tuples.size() * this.maxDocFreq);
            Set entries = docFreqs.entrySet();
            Iterator it = entries.iterator();
            while (it.hasNext()) {
                Map.Entry entry = it.next();
                int count = (Integer)entry.getValue();
                if (count >= min && count <= max) continue;
                it.remove();
            }
            int totalTerms = docFreqs.size();
            Set keys = docFreqs.keySet();
            ArrayList<String> features = new ArrayList<String>(keys);
            double[][] docVec = new double[tuples.size()][];
            int t = 0;
            while (t < tuples.size()) {
                Tuple tuple = (Tuple)tuples.get(t);
                List terms = (List)tuple.get("terms");
                HashMap<String, Integer> termFreq = new HashMap<String, Integer>();
                for (String term : terms) {
                    if (!docFreqs.containsKey(term)) continue;
                    if (termFreq.containsKey(term)) {
                        int count = (Integer)termFreq.get(term);
                        termFreq.put(term, ++count);
                        continue;
                    }
                    termFreq.put(term, 1);
                }
                double[] termVec = new double[totalTerms];
                int i = 0;
                while (i < totalTerms) {
                    String feature = (String)features.get(i);
                    int df = (Integer)docFreqs.get(feature);
                    int tf = termFreq.containsKey(feature) ? (Integer)termFreq.get(feature) : 0;
                    termVec[i] = Math.sqrt(tf) * (Math.log((double)(tuples.size() + 1) / (double)(df + 1)) + 1.0);
                    ++i;
                }
                docVec[t] = termVec;
                ++t;
            }
            Matrix matrix = new Matrix(docVec);
            matrix.setColumnLabels(features);
            matrix.setRowLabels(rowLabels);
            matrix.setAttribute("docFreqs", docFreqs);
            return matrix;
        }
        throw new IOException("The termVectors function takes a single positional parameter.");
    }
}

