package be.ac.vub.bsb.parsers.edwards;

import be.ac.ulb.bigre.pathwayinference.core.core.PathwayinferenceConstants;
import be.ac.ulb.bigre.pathwayinference.core.util.DiverseTools;
import be.ac.ulb.mlg.utils.Discretizer;
import be.ac.ulb.mlg.utils.discretizer.UniformFrequencyDiscretizer;
import be.ac.ulb.mlg.utils.measure.BrayCurtis;
import be.ac.ulb.mlg.utils.measure.BrownCorrelation;
import be.ac.ulb.mlg.utils.measure.Euclidean;
import be.ac.ulb.mlg.utils.measure.Hellinger;
import be.ac.ulb.mlg.utils.measure.Kendall;
import be.ac.ulb.mlg.utils.measure.KullbackLeibler;
import be.ac.ulb.mlg.utils.measure.MutualInformation;
import be.ac.ulb.mlg.utils.measure.Pearson;
import be.ac.ulb.mlg.utils.measure.Spearman;
import be.ac.vub.bsb.cooccurrence.cmd.OptionNames;
import be.ac.vub.bsb.cooccurrence.core.CooccurrenceConstants;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import be.ac.vub.bsb.cooccurrence.measures.MatrixToolsProvider;
import be.ac.vub.bsb.cooccurrence.measures.NaNTreatment;
import be.ac.vub.bsb.cooccurrence.measures.NaNTreatmentProvider;
import be.ac.vub.bsb.cooccurrence.measures.StatsProvider;
import be.ac.vub.bsb.cooccurrence.util.ArrayTools;
import cern.colt.matrix.DoubleMatrix1D;
import com.amazonaws.services.s3.model.InstructionFileId;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import jsc.correlation.KendallCorrelation;
import jsc.datastructures.PairedData;
import org.apache.commons.math3.stat.correlation.SpearmansCorrelation;

/* loaded from: input_file:be/ac/vub/bsb/parsers/edwards/PhageBacSimMatGenerator.class */
public class PhageBacSimMatGenerator {
    public static Integer MISSING_VALUE_FREE_PAIRS = 50;
    public static String OTU_MATCHER = "^(OTU_NC_|NC_)[\\d]{6}";
    public static String OTU_LEVEL = "otu";
    public static String DEFAULT_TAXON_LEVEL = OTU_LEVEL;
    public static String DEFAULT_MEASURE = CooccurrenceConstants.PEARSON;
    public static String HYPERGEOM = "hypergeom";
    private Matrix _phageMatrix = new Matrix();
    private String _phageMetadataLocation = "";
    private Matrix _hostMatrix = new Matrix();
    private String _hostMetadataLocation = "";
    private Matrix _crossCorrelMatrix = new Matrix();
    private String _taxonLevel = OTU_LEVEL;
    private String _measure = DEFAULT_MEASURE;
    private boolean _noJSL = false;
    private boolean _replaceZeroByNaN = false;
    private boolean _treatMissingValues = false;
    private boolean _convertToBinary = false;
    private Discretizer _discretizer;

    private double getValueWithJSL(DoubleMatrix1D doubleMatrix1D, DoubleMatrix1D doubleMatrix1D2) {
        if (isTreatMissingValues()) {
            NaNTreatmentProvider.getInstance();
            List<Object> treatMissingValuesInVectors = NaNTreatment.treatMissingValuesInVectors(doubleMatrix1D, doubleMatrix1D2);
            doubleMatrix1D = (DoubleMatrix1D) treatMissingValuesInVectors.get(0);
            doubleMatrix1D2 = (DoubleMatrix1D) treatMissingValuesInVectors.get(1);
        }
        if (doubleMatrix1D.size() >= MISSING_VALUE_FREE_PAIRS.intValue()) {
            if (getMeasure().equals(CooccurrenceConstants.PEARSON)) {
                return new Pearson().measure(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray(), null);
            }
            if (getMeasure().equals(CooccurrenceConstants.SPEARMAN)) {
                return new Spearman().measure(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray(), null);
            }
            if (getMeasure().equals(CooccurrenceConstants.KENDALL)) {
                return new Kendall().measure(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray(), null);
            }
            if (getMeasure().equals(CooccurrenceConstants.KLD)) {
                return new KullbackLeibler().measure(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray(), null);
            }
            if (getMeasure().equals("mutInfo")) {
                return new MutualInformation(this._discretizer).measure(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray(), null);
            }
            if (getMeasure().equals(CooccurrenceConstants.BRAY_CURTIS)) {
                return new BrayCurtis().measure(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray(), null);
            }
            if (getMeasure().equals(CooccurrenceConstants.EUCLID)) {
                return new Euclidean().measure(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray(), null);
            }
            if (getMeasure().equals(CooccurrenceConstants.HELLINGER)) {
                return new Hellinger().measure(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray(), null);
            }
            if (getMeasure().equals(CooccurrenceConstants.DISTANCE_CORREL)) {
                return new BrownCorrelation().measure(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray(), null);
            }
            System.err.println("Measure " + getMeasure() + " not supported!");
        }
        return Double.NaN;
    }

    private double getValueWithHome(DoubleMatrix1D doubleMatrix1D, DoubleMatrix1D doubleMatrix1D2) {
        if (isTreatMissingValues()) {
            NaNTreatmentProvider.getInstance();
            List<Object> treatMissingValuesInVectors = NaNTreatment.treatMissingValuesInVectors(doubleMatrix1D, doubleMatrix1D2);
            doubleMatrix1D = (DoubleMatrix1D) treatMissingValuesInVectors.get(0);
            doubleMatrix1D2 = (DoubleMatrix1D) treatMissingValuesInVectors.get(1);
        }
        if (doubleMatrix1D.size() >= MISSING_VALUE_FREE_PAIRS.intValue()) {
            if (getMeasure().equals(CooccurrenceConstants.PEARSON)) {
                return MatrixToolsProvider.getPearsonUsingJSC(doubleMatrix1D, doubleMatrix1D2, false);
            }
            if (getMeasure().equals(CooccurrenceConstants.KENDALL)) {
                return new KendallCorrelation(new PairedData(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray())).getTestStatistic();
            }
            if (getMeasure().endsWith(CooccurrenceConstants.SPEARMAN)) {
                return new SpearmansCorrelation().correlation(doubleMatrix1D.toArray(), doubleMatrix1D2.toArray());
            }
            if (getMeasure().endsWith(CooccurrenceConstants.BRAY_CURTIS)) {
                Matrix matrix = new Matrix(2, doubleMatrix1D.size());
                matrix.setRow(0, doubleMatrix1D.toArray());
                matrix.setRow(1, doubleMatrix1D2.toArray());
                return MatrixToolsProvider.getBrayCurtisDistance(matrix).getMatrix().get(0, 1);
            }
            if (getMeasure().endsWith(CooccurrenceConstants.KLD)) {
                Matrix matrix2 = new Matrix(2, doubleMatrix1D.size());
                matrix2.setRow(0, doubleMatrix1D.toArray());
                matrix2.setRow(1, doubleMatrix1D2.toArray());
                return MatrixToolsProvider.getKullbackLeibler(matrix2, MatrixToolsProvider.DEFAULT_LOG_BASIS.doubleValue(), MatrixToolsProvider.DEFAULT_PSEUDO_COUNT.doubleValue(), false).getMatrix().get(0, 1);
            }
            if (getMeasure().equals(CooccurrenceConstants.EUCLID)) {
                Matrix matrix3 = new Matrix(2, doubleMatrix1D.size());
                matrix3.setRow(0, doubleMatrix1D.toArray());
                matrix3.setRow(1, doubleMatrix1D2.toArray());
                return MatrixToolsProvider.getEuclideanDistance(matrix3).getMatrix().get(0, 1);
            }
            if (getMeasure().equals(CooccurrenceConstants.HELLINGER)) {
                Matrix matrix4 = new Matrix(2, doubleMatrix1D.size());
                matrix4.setRow(0, doubleMatrix1D.toArray());
                matrix4.setRow(1, doubleMatrix1D2.toArray());
                return MatrixToolsProvider.getHellinger(matrix4).getMatrix().get(0, 1);
            }
            if (getMeasure().equals(CooccurrenceConstants.DISTANCE_CORREL)) {
                Matrix matrix5 = new Matrix(2, doubleMatrix1D.size());
                matrix5.setRow(0, doubleMatrix1D.toArray());
                matrix5.setRow(1, doubleMatrix1D2.toArray());
                return MatrixToolsProvider.getDistanceCorrelation(matrix5).getMatrix().get(0, 1);
            }
            if (getMeasure().equals(HYPERGEOM)) {
                int i = 0;
                int i2 = 0;
                int i3 = 0;
                int size = doubleMatrix1D.size();
                for (int i4 = 0; i4 < doubleMatrix1D.size(); i4++) {
                    if (doubleMatrix1D.get(i4) == 1.0d) {
                        i2++;
                    }
                    if (doubleMatrix1D2.get(i4) == 1.0d) {
                        i3++;
                    }
                    if (doubleMatrix1D.get(i4) == 1.0d && doubleMatrix1D2.get(i4) == 1.0d) {
                        i++;
                    }
                }
                return StatsProvider.getPValWithHypergeometricDistribForCopresenceUsingJSC(i, i2, i3, size);
            }
            System.err.println("Measure " + getMeasure() + " not supported!");
        }
        return Double.NaN;
    }

    public void computeCrossCorrelations() {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        HashSet hashSet4 = new HashSet();
        HashSet hashSet5 = new HashSet();
        HashSet hashSet6 = new HashSet();
        if (isTreatMissingValues()) {
            NaNTreatmentProvider.getInstance().setTreatmentStrategy(NaNTreatment.PAIRWISE_NA_OMIT);
            NaNTreatmentProvider.getInstance().setRequiredNaNFreeNumber(MISSING_VALUE_FREE_PAIRS.intValue());
        } else {
            NaNTreatmentProvider.getInstance().setTreatmentStrategy(NaNTreatment.NO_TREATMENT);
        }
        List<String> stringToList = DiverseTools.stringToList(PhageBacPreprocessor.METADATA_ATTRIBS, "/");
        ArrayList arrayList3 = new ArrayList();
        for (String str : stringToList) {
            arrayList3.add("java.lang.String");
        }
        if (!this._hostMetadataLocation.isEmpty()) {
            this._hostMatrix.readRowMetaData(this._hostMetadataLocation, stringToList, arrayList3);
            for (String str2 : this._hostMatrix.getRowMetaData().getElements()) {
                for (String str3 : this._hostMatrix.getRowMetaData().getAnnotations(str2)) {
                    if (str3.equals("species")) {
                        hashSet.add(this._hostMatrix.getRowMetaData().getAnnotation(str2, str3).toString());
                    } else if (str3.equals("genus")) {
                        hashSet2.add(this._hostMatrix.getRowMetaData().getAnnotation(str2, str3).toString());
                    } else if (str3.equals(OptionNames.errorDistribution)) {
                        hashSet3.add(this._hostMatrix.getRowMetaData().getAnnotation(str2, str3).toString());
                    } else if (str3.equals(PathwayinferenceConstants.ORDER)) {
                        hashSet4.add(this._hostMatrix.getRowMetaData().getAnnotation(str2, str3).toString());
                    } else if (str3.equals("class")) {
                        hashSet5.add(this._hostMatrix.getRowMetaData().getAnnotation(str2, str3).toString());
                    } else if (str3.equals("phylum")) {
                        hashSet6.add(this._hostMatrix.getRowMetaData().getAnnotation(str2, str3).toString());
                    }
                }
            }
        }
        if (!this._phageMetadataLocation.isEmpty()) {
            this._phageMatrix.readRowMetaData(this._phageMetadataLocation, stringToList, arrayList3);
            for (String str4 : this._phageMatrix.getRowMetaData().getElements()) {
                for (String str5 : this._phageMatrix.getRowMetaData().getAnnotations(str4)) {
                    if (str5.equals("species")) {
                        hashSet.add(this._phageMatrix.getRowMetaData().getAnnotation(str4, str5).toString());
                    } else if (str5.equals("genus")) {
                        hashSet2.add(this._phageMatrix.getRowMetaData().getAnnotation(str4, str5).toString());
                    } else if (str5.equals(OptionNames.errorDistribution)) {
                        hashSet3.add(this._phageMatrix.getRowMetaData().getAnnotation(str4, str5).toString());
                    } else if (str5.equals(PathwayinferenceConstants.ORDER)) {
                        hashSet4.add(this._phageMatrix.getRowMetaData().getAnnotation(str4, str5).toString());
                    } else if (str5.equals("class")) {
                        hashSet5.add(this._phageMatrix.getRowMetaData().getAnnotation(str4, str5).toString());
                    } else if (str5.equals("phylum")) {
                        hashSet6.add(this._phageMatrix.getRowMetaData().getAnnotation(str4, str5).toString());
                    }
                }
            }
        }
        if (isReplaceZeroByNaN()) {
            this._hostMatrix = PhageBacPreprocessor.replaceZeroByMissingValue(this._hostMatrix);
            this._phageMatrix = PhageBacPreprocessor.replaceZeroByMissingValue(this._phageMatrix);
        }
        if (getMeasure().equals("mutInfo")) {
            System.out.println("Preparing discretizer using equal frequency...");
            int rint = (int) Math.rint(Math.sqrt(Integer.valueOf(this._phageMatrix.getMatrix().columns()).doubleValue()));
            MatrixToolsProvider.logger.info("Discretizing into " + rint + " intervals...");
            this._discretizer = new UniformFrequencyDiscretizer(Discretizer.Mode.ROW_WISE, rint);
        }
        for (int i = 0; i < getPhageMatrix().getMatrix().rows(); i++) {
            String rowName = getPhageMatrix().getRowName(i);
            String replace = rowName.replace("_phage", "");
            if (!replace.matches(OTU_MATCHER)) {
                if (hashSet.contains(replace) && getTaxonLevel().equals("species")) {
                    arrayList.add(rowName);
                }
                if (hashSet2.contains(replace) && getTaxonLevel().equals("genus")) {
                    arrayList.add(rowName);
                }
                if (hashSet3.contains(replace) && getTaxonLevel().equals(OptionNames.errorDistribution)) {
                    arrayList.add(rowName);
                }
                if (hashSet4.contains(replace) && getTaxonLevel().equals(PathwayinferenceConstants.ORDER)) {
                    arrayList.add(rowName);
                }
                if (hashSet5.contains(replace) && getTaxonLevel().equals("class")) {
                    arrayList.add(rowName);
                }
                if (hashSet6.contains(replace) && getTaxonLevel().equals("phylum")) {
                    arrayList.add(rowName);
                }
            } else if (getTaxonLevel().equals(OTU_LEVEL)) {
                arrayList.add(rowName);
            }
        }
        for (int i2 = 0; i2 < getHostMatrix().getMatrix().rows(); i2++) {
            String rowName2 = getHostMatrix().getRowName(i2);
            if (!rowName2.matches(OTU_MATCHER)) {
                if (hashSet.contains(rowName2) && getTaxonLevel().equals("species")) {
                    arrayList2.add(rowName2);
                }
                if (hashSet2.contains(rowName2) && getTaxonLevel().equals("genus")) {
                    arrayList2.add(rowName2);
                }
                if (hashSet3.contains(rowName2) && getTaxonLevel().equals(OptionNames.errorDistribution)) {
                    arrayList2.add(rowName2);
                }
                if (hashSet4.contains(rowName2) && getTaxonLevel().equals(PathwayinferenceConstants.ORDER)) {
                    arrayList2.add(rowName2);
                }
                if (hashSet5.contains(rowName2) && getTaxonLevel().equals("class")) {
                    arrayList2.add(rowName2);
                }
                if (hashSet6.contains(rowName2) && getTaxonLevel().equals("phylum")) {
                    arrayList2.add(rowName2);
                }
            } else if (getTaxonLevel().equals(OTU_LEVEL)) {
                arrayList2.add(rowName2);
            }
        }
        Matrix subMatrix = MatrixToolsProvider.getSubMatrix(getPhageMatrix(), arrayList);
        Matrix subMatrix2 = MatrixToolsProvider.getSubMatrix(getHostMatrix(), arrayList2);
        if (subMatrix.getMatrix().columns() != subMatrix2.getMatrix().columns()) {
            throw new IllegalArgumentException("Phage and host matrix have different column numbers!");
        }
        if (getMeasure().equals(CooccurrenceConstants.KLD)) {
            System.out.println("Replacing zeros by pseudocounts (" + MatrixToolsProvider.DEFAULT_PSEUDO_COUNT + ") and standardizing matrix row-wise...");
            subMatrix = MatrixToolsProvider.getConditionalProbabMatrix(MatrixToolsProvider.addPseudoCountToZeroEntries(subMatrix, MatrixToolsProvider.DEFAULT_PSEUDO_COUNT.doubleValue()));
            subMatrix2 = MatrixToolsProvider.getConditionalProbabMatrix(MatrixToolsProvider.addPseudoCountToZeroEntries(subMatrix2, MatrixToolsProvider.DEFAULT_PSEUDO_COUNT.doubleValue()));
        }
        if (getMeasure().equals(CooccurrenceConstants.BRAY_CURTIS) || getMeasure().equals(CooccurrenceConstants.EUCLID) || getMeasure().equals(CooccurrenceConstants.HELLINGER)) {
            System.out.println("Standardizing matrix row-wise...");
            subMatrix = MatrixToolsProvider.getConditionalProbabMatrix(subMatrix);
            subMatrix2 = MatrixToolsProvider.getConditionalProbabMatrix(subMatrix2);
        }
        System.out.println("Obtained " + arrayList.size() + " phages and " + arrayList2.size() + " hosts on level " + getTaxonLevel() + InstructionFileId.DOT);
        setCrossCorrelMatrix(new Matrix(arrayList.size(), arrayList2.size()));
        getCrossCorrelMatrix().setRowNames(ArrayTools.m280toArray((List<String>) arrayList));
        getCrossCorrelMatrix().setColNames(ArrayTools.m280toArray((List<String>) arrayList2));
        for (int i3 = 0; i3 < subMatrix.getMatrix().rows(); i3++) {
            for (int i4 = 0; i4 < subMatrix2.getMatrix().rows(); i4++) {
                getCrossCorrelMatrix().getMatrix().set(i3, i4, isNoJSL() ? getValueWithHome(subMatrix.getMatrix().viewRow(i3), subMatrix2.getMatrix().viewRow(i4)) : getValueWithJSL(subMatrix.getMatrix().viewRow(i3), subMatrix2.getMatrix().viewRow(i4)));
            }
        }
    }

    public String getMeasure() {
        return this._measure;
    }

    public void setMeasure(String str) {
        this._measure = str;
    }

    public Matrix getPhageMatrix() {
        return this._phageMatrix;
    }

    public void setPhageMatrix(Matrix matrix) {
        this._phageMatrix = matrix;
    }

    public Matrix getHostMatrix() {
        return this._hostMatrix;
    }

    public void setHostMatrix(Matrix matrix) {
        this._hostMatrix = matrix;
    }

    public Matrix getCrossCorrelMatrix() {
        return this._crossCorrelMatrix;
    }

    private void setCrossCorrelMatrix(Matrix matrix) {
        this._crossCorrelMatrix = matrix;
    }

    public String getPhageMetadataLocation() {
        return this._phageMetadataLocation;
    }

    public void setPhageMetadataLocation(String str) {
        this._phageMetadataLocation = str;
    }

    public String getHostMetadataLocation() {
        return this._hostMetadataLocation;
    }

    public void setHostMetadataLocation(String str) {
        this._hostMetadataLocation = str;
    }

    public String getTaxonLevel() {
        return this._taxonLevel;
    }

    public void setTaxonLevel(String str) {
        this._taxonLevel = str;
    }

    public boolean isNoJSL() {
        return this._noJSL;
    }

    public void setNoJSL(boolean z) {
        this._noJSL = z;
    }

    public boolean isTreatMissingValues() {
        return this._treatMissingValues;
    }

    public void setTreatMissingValues(boolean z) {
        this._treatMissingValues = z;
    }

    public boolean isReplaceZeroByNaN() {
        return this._replaceZeroByNaN;
    }

    public void setReplaceZeroByNaN(boolean z) {
        this._replaceZeroByNaN = z;
    }

    public static void main(String[] strArr) {
        Matrix matrix = new Matrix();
        matrix.readMatrix("/Users/u0097353/Documents/Documents_Karoline/BSB_Lab/Collaborations/Rob_Edwards/Input_all/bacteria_hits_minocc1-filtered-lineages_processed.txt", false);
        System.out.println("Host matrix read in");
        Matrix matrix2 = new Matrix();
        matrix2.readMatrix("/Users/u0097353/Documents/Documents_Karoline/BSB_Lab/Collaborations/Rob_Edwards/Input_all/phage_hits_minocc1-filtered-lineages_processed.txt", false);
        System.out.println("Phage matrix read in");
        for (String str : DiverseTools.stringToList("phylum/class/order/family/genus/species/otu", "/")) {
            PhageBacSimMatGenerator phageBacSimMatGenerator = new PhageBacSimMatGenerator();
            phageBacSimMatGenerator.setHostMatrix(matrix);
            phageBacSimMatGenerator.setHostMetadataLocation("/Users/u0097353/Documents/Documents_Karoline/BSB_Lab/Collaborations/Rob_Edwards/Input_all/bacteria_hits_minocc1-filtered-lineages-metadata.txt");
            phageBacSimMatGenerator.setPhageMatrix(matrix2);
            phageBacSimMatGenerator.setPhageMetadataLocation("/Users/u0097353/Documents/Documents_Karoline/BSB_Lab/Collaborations/Rob_Edwards/Input_all/phage_hits_minocc1-filtered-lineages-metadata.txt");
            phageBacSimMatGenerator.setMeasure(CooccurrenceConstants.KENDALL);
            phageBacSimMatGenerator.setTaxonLevel(str);
            phageBacSimMatGenerator.setNoJSL(true);
            phageBacSimMatGenerator.setTreatMissingValues(true);
            phageBacSimMatGenerator.setReplaceZeroByNaN(true);
            phageBacSimMatGenerator.computeCrossCorrelations();
            phageBacSimMatGenerator.getCrossCorrelMatrix().writeMatrix("phageHostSim_" + phageBacSimMatGenerator.getMeasure() + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + phageBacSimMatGenerator.getTaxonLevel() + ".txt", "\t", true, true);
        }
    }
}
