/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.appscan.ifa.tfidf;

import com.ibm.appscan.ifa.tfidf.TfidfDictionary;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;

public class TfidfCalculator
implements Serializable {
    private static final long serialVersionUID = -7181412390503371659L;
    private TfidfDictionary m_dictionary;

    public TfidfCalculator(TfidfDictionary dictionary) {
        this.m_dictionary = dictionary;
    }

    public TfidfDictionary getDictionary() {
        return this.m_dictionary;
    }

    public double calcTFIDF(String term, String doc) {
        return this.calcTermFreqNorm(term, doc) * this.calcIDF(term);
    }

    public double calcCosineSimilarity(String doc1, String doc2) {
        double[] vector2;
        double[] vector1 = this.convertDocToVector(doc1);
        double euclidianNormProduct = this.calcEuclidianNormProduct(vector1, vector2 = this.convertDocToVector(doc2));
        if (euclidianNormProduct == 0.0) {
            return 0.0;
        }
        return this.calcDotProduct(vector1, vector2) / euclidianNormProduct;
    }

    public void calcAllCosineSimilarities(String doc) {
        for (String s : this.m_dictionary.getDocuments().keySet()) {
            System.out.println(doc + " : " + s);
            System.out.println(this.calcCosineSimilarity(doc, s));
        }
    }

    public double calcHighestCosineSimilarity(String doc) {
        double best = 0.0;
        System.out.println("Beginning:" + doc);
        for (String s : this.m_dictionary.getDocuments().keySet()) {
            System.out.println(s);
            double current = this.calcCosineSimilarity(doc, s);
            if (current > best && (best = current) > 0.99) {
                return best;
            }
            System.out.println(current + ":" + s);
        }
        return best;
    }

    public double calcAverageCosineSimilarity(String doc1) {
        double total = 0.0;
        for (String s : this.m_dictionary.getDocuments().keySet()) {
            total += this.calcCosineSimilarity(doc1, s);
        }
        return total / (double)this.m_dictionary.getDocuments().size();
    }

    public double calcDotProduct(double[] vector1, double[] vector2) {
        double result = 0.0;
        for (int i = 0; i < vector1.length; ++i) {
            result += vector1[i] * vector2[i];
        }
        return result;
    }

    public double calcEuclidianNormProduct(double[] vector1, double[] vector2) {
        double vector1Result = 0.0;
        double vector2Result = 0.0;
        for (int i = 0; i < vector1.length; ++i) {
            vector1Result += vector1[i] * vector1[i];
            vector2Result += vector2[i] * vector2[i];
        }
        return Math.sqrt(vector1Result) * Math.sqrt(vector2Result);
    }

    public double[] convertDocToVector(String doc) {
        double[] vector = new double[this.getTermList().size()];
        for (int i = 0; i < this.getTermList().size(); ++i) {
            vector[i] = this.calcTFIDF(this.getTermList().get(i), doc);
        }
        return vector;
    }

    public List<String> getTermList() {
        return new ArrayList<String>(this.m_dictionary.getTermCounts().keySet());
    }

    public int calcTermFreqBinary(String term, String doc) {
        if (this.docContainsTerm(term, doc)) {
            return 1;
        }
        return 0;
    }

    public int calcTermFreqRaw(String term, String doc) {
        return Collections.frequency(Arrays.asList(doc.split(" ")), term);
    }

    public double calcTermFreqNorm(String term, String doc) {
        return (double)this.calcTermFreqRaw(term, doc) / (double)doc.split(" ").length;
    }

    public double calcTermFreqLogNorm(String term, String doc) {
        if (!this.docContainsTerm(term, doc)) {
            return 0.0;
        }
        return 1.0 + Math.log(this.calcTermFreqRaw(term, doc));
    }

    public double calcTermFreqDoubleNorm(String term, String doc, double k) {
        return k + (1.0 - k) * (double)this.calcTermFreqRaw(term, doc) / this.calcMaxRawFrequency(doc);
    }

    public double calcIDF(String term) {
        if (this.m_dictionary.getTermCount(term) == 0) {
            return 0.0;
        }
        return 1.0 + Math.log((double)this.m_dictionary.getNumDocuments() / (double)this.m_dictionary.getTermCount(term));
    }

    public HashMap<String, Double> calcAllTFIDF(String doc) {
        HashMap<String, Double> result = new HashMap<String, Double>();
        for (String s : doc.split(" ")) {
            result.put(s, this.calcTFIDF(s, doc));
        }
        return result;
    }

    public Double calcSumAllTFIDF(String doc) {
        double sum = 0.0;
        for (Double d : this.calcAllTFIDF(doc).values()) {
            sum += d.doubleValue();
        }
        return sum;
    }

    public Double calcAverageTFIDF(String doc) {
        return this.calcSumAllTFIDF(doc) / (double)doc.split(" ").length;
    }

    public String calcHighestTFIDF(String doc) {
        String result = null;
        Double highestTFIDF = 0.0;
        Double currentTFIDF = 0.0;
        for (String s : doc.split(" ")) {
            currentTFIDF = this.calcTFIDF(s, doc);
            if (!(currentTFIDF > highestTFIDF)) continue;
            result = s;
            highestTFIDF = currentTFIDF;
        }
        return result;
    }

    private double calcMaxRawFrequency(String doc) {
        double max = 0.0;
        for (String s : doc.split(" ")) {
            double current = this.calcTermFreqRaw(s, doc);
            if (!(current > max)) continue;
            max = current;
        }
        return max;
    }

    private boolean docContainsTerm(String term, String doc) {
        return Arrays.asList(doc.split(" ")).contains(term);
    }
}

