package be.ac.vub.bsb.cooccurrence.util;

import be.ac.ulb.bigre.pathwayinference.core.util.DiverseTools;
import be.ac.ulb.scmbb.snow.graph.core.GraphDataLinker;
import be.ac.ulb.scmbb.snow.graph.core.Node;
import be.ac.vub.bsb.cooccurrence.analysis.UnifracWrapper;
import be.ac.vub.bsb.cooccurrence.cmd.CooccurrenceAnalyser;
import be.ac.vub.bsb.cooccurrence.conversion.MatrixFilterer;
import be.ac.vub.bsb.cooccurrence.conversion.NetworkFilterer;
import be.ac.vub.bsb.cooccurrence.core.CooccurrenceConstants;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import be.ac.vub.bsb.cooccurrence.measures.MatrixToolsProvider;
import be.ac.vub.bsb.parsers.util.BiomTableParser;
import cern.colt.matrix.DoubleMatrix1D;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.log4j.Logger;

/* loaded from: input_file:be/ac/vub/bsb/cooccurrence/util/HigherLevelTaxaAssigner.class */
public class HigherLevelTaxaAssigner implements IMethod {
    public static boolean OMIT_ROOT = true;
    public static String BACTERIA_ROOT = "bacteria";
    public static String ARCHAEA_ROOT = "archaea";
    public static String EUKARYA_ROOT = "eukarya";
    public static String EUKARYA_ROOT_ALT = "eukaryota";
    private String _lineageAttrib = "";
    private String _groupAttrib = "";
    private boolean _standardQiimeLineage = false;
    private Matrix _inputMatrix = new Matrix();
    private Matrix _extendedMatrix = new Matrix();
    private boolean _twoLevelRowNames = false;
    private Map<String, String> _higherLevelTaxonVsLineage = new HashMap();
    private String _currentGroup = "";
    private int _addedRowNumber = 0;
    private boolean _higherLevelTaxaAssigned = false;
    protected Logger _logger = Logger.getLogger(getClass().getPackage().toString());

    private void storeLineage(String[] strArr, int i) {
        String str = "";
        for (int i2 = 0; i2 < strArr.length && i2 <= i; i2++) {
            str = String.valueOf(str) + CooccurrenceAnalyser.LINEAGE_SEPARATOR + strArr[i2];
        }
        if (str.startsWith(CooccurrenceAnalyser.LINEAGE_SEPARATOR)) {
            str = str.replaceFirst(CooccurrenceAnalyser.LINEAGE_SEPARATOR, "");
        }
        if (this._higherLevelTaxonVsLineage.containsKey(strArr[i])) {
            return;
        }
        this._higherLevelTaxonVsLineage.put(strArr[i], str);
    }

    private Set<String> collectHigherLevelTaxa(Matrix matrix) {
        TreeSet treeSet = new TreeSet();
        for (int i = 0; i < matrix.getMatrix().rows(); i++) {
            if (matrix.hasRowMetaAnnotation(i, getLineageAttrib())) {
                String[] split = matrix.getRowMetaAnnotation(i, getLineageAttrib()).toString().split(CooccurrenceAnalyser.LINEAGE_SEPARATOR);
                int length = split.length - 1;
                for (int i2 = 0; i2 < length; i2++) {
                    treeSet.add(split[i2]);
                    storeLineage(split, i2);
                }
            } else {
                this._logger.warn("Row " + matrix.getRowName(i) + " with index " + i + " does not have a lineage assigned.");
            }
        }
        System.out.println(treeSet);
        return treeSet;
    }

    private List<Integer> getMemberTaxaIndices(Matrix matrix, String str) {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < matrix.getMatrix().rows(); i++) {
            if (!matrix.getRowName(i).contains(MatrixFilterer.NAME_SUMMED_FILTERED_NON_FEAT_ROW)) {
                if (matrix.hasRowMetaAnnotation(i, getLineageAttrib())) {
                    for (String str2 : matrix.getRowMetaAnnotation(i, getLineageAttrib()).toString().split(CooccurrenceAnalyser.LINEAGE_SEPARATOR)) {
                        if (str2.equals(str)) {
                            arrayList.add(Integer.valueOf(i));
                        }
                    }
                } else {
                    this._logger.warn("Row " + getInputMatrix().getRowName(i) + " does not have a lineage assigned.");
                }
            }
        }
        return arrayList;
    }

    private Map<String, DoubleMatrix1D> sumMemberTaxa(Matrix matrix, Set<String> set) {
        HashMap hashMap = new HashMap();
        new ArrayList();
        DoubleMatrix1D doubleMatrix1D = null;
        for (String str : set) {
            List<Integer> memberTaxaIndices = getMemberTaxaIndices(matrix, str);
            int i = 0;
            while (i < memberTaxaIndices.size()) {
                doubleMatrix1D = i == 0 ? matrix.getMatrix().viewRow(memberTaxaIndices.get(i).intValue()) : VectorToolsProvider.sumVectorsIgnoringNaNs(matrix.getMatrix().viewRow(memberTaxaIndices.get(i).intValue()), doubleMatrix1D);
                i++;
            }
            hashMap.put(str, doubleMatrix1D);
        }
        return hashMap;
    }

    private String getTwoLevelRowName(String str, String str2) {
        String str3;
        if (str2.isEmpty()) {
            str3 = str;
        } else {
            List<String> stringToList = DiverseTools.stringToList(str2, CooccurrenceAnalyser.LINEAGE_SEPARATOR);
            str3 = stringToList.size() > 1 ? String.valueOf(stringToList.get(stringToList.size() - 2)) + CooccurrenceAnalyser.LINEAGE_SEPARATOR + stringToList.get(stringToList.size() - 1) : stringToList.get(0);
        }
        if (!getGroupAttrib().isEmpty()) {
            str3 = String.valueOf(str3) + CooccurrenceAnalyser.BODYSITE_SEPARATOR + this._currentGroup;
        }
        return str3;
    }

    private Matrix rowMapToMatrix(Map<String, DoubleMatrix1D> map, Matrix matrix) {
        String str;
        Matrix matrix2 = new Matrix();
        if (map.isEmpty()) {
            this._logger.error("Could not assign any higher-level taxa!");
        } else {
            int i = 0;
            for (String str2 : map.keySet()) {
                if (str2.toLowerCase().equals(BACTERIA_ROOT) || str2.toLowerCase().equals(ARCHAEA_ROOT) || str2.toLowerCase().equals(EUKARYA_ROOT) || str2.toLowerCase().equals(EUKARYA_ROOT_ALT)) {
                    if (OMIT_ROOT) {
                        i++;
                    }
                }
            }
            matrix2 = new Matrix(map.keySet().size() - i, map.get(map.keySet().iterator().next()).size());
            matrix2.setColNames(matrix.getColNames());
            matrix2.setColMetaData(matrix.getColMetaData());
            int i2 = 0;
            for (String str3 : map.keySet()) {
                if ((str3.toLowerCase().equals(BACTERIA_ROOT) || str3.toLowerCase().equals(ARCHAEA_ROOT) || str3.toLowerCase().equals(EUKARYA_ROOT) || str3.toLowerCase().equals(EUKARYA_ROOT_ALT)) && OMIT_ROOT) {
                    System.out.println("Omitting root " + str3);
                } else {
                    if (this._higherLevelTaxonVsLineage.containsKey(str3)) {
                        str = this._higherLevelTaxonVsLineage.get(str3);
                    } else {
                        this._logger.warn("Did not obtain a lineage for higher-level taxon " + str3 + "!");
                        str = "";
                    }
                    String twoLevelRowName = !isTwoLevelRowNames() ? str3 : getTwoLevelRowName(str3, str);
                    if (!this._groupAttrib.isEmpty()) {
                        twoLevelRowName = String.valueOf(twoLevelRowName) + CooccurrenceAnalyser.BODYSITE_SEPARATOR + this._currentGroup;
                    }
                    matrix2.setRowName(i2, twoLevelRowName);
                    matrix2.setRow(i2, map.get(str3).toArray());
                    matrix2.setRowMetaAnnotation(i2, getLineageAttrib(), str);
                    matrix2.setRowMetaAnnotation(i2, CooccurrenceConstants.TAXON_ATTRIBUTE, str3);
                    matrix2.setRowMetaAnnotation(i2, FeatureMatrixLoader.IS_FEATURE_ATTRIB, FeatureMatrixLoader.FEATURE_ABSENT_VALUE);
                    if (isStandardQiimeLineage()) {
                        String[] strArr = BiomTableParser.STANDARD_TAXONOMIC_LEVELS;
                        String[] split = str.split(CooccurrenceAnalyser.LINEAGE_SEPARATOR);
                        for (int i3 = 0; i3 < strArr.length; i3++) {
                            if (i3 < split.length) {
                                matrix2.setRowMetaAnnotation(i2, strArr[i3], split[i3]);
                            } else {
                                matrix2.setRowMetaAnnotation(i2, strArr[i3], "none");
                            }
                        }
                    }
                    if (!this._currentGroup.isEmpty()) {
                        matrix2.setRowMetaAnnotation(i2, getGroupAttrib(), this._currentGroup);
                    }
                    i2++;
                }
            }
            this._addedRowNumber += matrix2.getMatrix().rows();
        }
        return matrix2;
    }

    public void assignHigherLevelTaxa() {
        this._logger.info("Assigning higher-level taxa rows...");
        Matrix featureSubMatrix = MatrixToolsProvider.getFeatureSubMatrix(getInputMatrix());
        HashSet hashSet = new HashSet();
        hashSet.addAll(ArrayTools.arrayToSet(featureSubMatrix.getRowNames()));
        this._logger.info("Features: " + hashSet.toString());
        setInputMatrix(MatrixToolsProvider.getSubmatrixWithoutRows(getInputMatrix(), hashSet));
        HashSet hashSet2 = new HashSet();
        for (int i = 0; i < getInputMatrix().getMatrix().rows(); i++) {
            if (getInputMatrix().getRowName(i).contains(MatrixFilterer.NAME_SUMMED_FILTERED_NON_FEAT_ROW)) {
                hashSet2.add(getInputMatrix().getRowName(i));
            }
        }
        Matrix matrix = new Matrix();
        if (hashSet2.size() > 0) {
            matrix = MatrixToolsProvider.getSubMatrix(getInputMatrix(), hashSet2);
            this._inputMatrix = MatrixToolsProvider.getSubmatrixWithoutRows(this._inputMatrix, hashSet2);
        }
        Integer num = 0;
        for (int i2 = 0; i2 < getInputMatrix().getMatrix().rows(); i2++) {
            if (!getInputMatrix().hasRowMetaAnnotation(i2, getLineageAttrib())) {
                this._logger.warn("Setting dummy lineage for row " + getInputMatrix().getRowName(i2) + " without a lineage.");
                getInputMatrix().setRowMetaAnnotation(i2, this._lineageAttrib, "unclassified" + num + CooccurrenceAnalyser.LINEAGE_SEPARATOR + getInputMatrix().getRowName(i2));
                num = Integer.valueOf(num.intValue() + 1);
            }
        }
        if (num.intValue() > 0) {
            this._logger.warn("Encountered " + num + " empty lineages!");
        }
        if (getGroupAttrib().isEmpty()) {
            setExtendedMatrix(MatrixToolsProvider.mergeMatricesRowWise(getInputMatrix(), rowMapToMatrix(sumMemberTaxa(getInputMatrix(), collectHigherLevelTaxa(getInputMatrix())), getInputMatrix())));
        } else {
            TreeMap treeMap = new TreeMap();
            HashSet<String> hashSet3 = new HashSet();
            for (int i3 = 0; i3 < getInputMatrix().getMatrix().rows(); i3++) {
                if (getInputMatrix().hasRowMetaAnnotation(i3, getGroupAttrib())) {
                    hashSet3.add(getInputMatrix().getRowMetaAnnotation(i3, getGroupAttrib()).toString());
                }
            }
            this._logger.info("groups: " + hashSet3.toString());
            new Matrix();
            for (String str : hashSet3) {
                this._logger.info("Processing group " + str);
                this._currentGroup = str;
                Matrix subMatrixForGivenAttributeValue = MatrixToolsProvider.getSubMatrixForGivenAttributeValue(getInputMatrix(), getGroupAttrib(), str);
                treeMap.put(str, MatrixToolsProvider.mergeMatricesRowWise(subMatrixForGivenAttributeValue, rowMapToMatrix(sumMemberTaxa(subMatrixForGivenAttributeValue, collectHigherLevelTaxa(subMatrixForGivenAttributeValue)), subMatrixForGivenAttributeValue)));
            }
            String str2 = (String) treeMap.keySet().iterator().next();
            Matrix matrix2 = (Matrix) treeMap.get(str2);
            hashSet3.remove(str2);
            Iterator it = hashSet3.iterator();
            while (it.hasNext()) {
                matrix2 = MatrixToolsProvider.mergeMatricesRowWise(matrix2, (Matrix) treeMap.get((String) it.next()));
            }
            setExtendedMatrix(matrix2);
        }
        if (!featureSubMatrix.isEmpty()) {
            setExtendedMatrix(MatrixToolsProvider.mergeMatricesRowWise(getExtendedMatrix(), featureSubMatrix));
        }
        if (hashSet2.size() > 0) {
            setExtendedMatrix(MatrixToolsProvider.mergeMatricesRowWise(this._extendedMatrix, matrix));
        }
        this._higherLevelTaxaAssigned = true;
    }

    public GraphDataLinker postprocessResultNetworkWithHigherTaxa(GraphDataLinker graphDataLinker) {
        String str;
        if (!this._higherLevelTaxaAssigned) {
            assignHigherLevelTaxa();
        }
        for (Node node : graphDataLinker.getGraph().getNodes()) {
            if (node.getIdentifier().contains(CooccurrenceAnalyser.BODYSITE_SEPARATOR)) {
                String str2 = node.getIdentifier().split(CooccurrenceAnalyser.BODYSITE_SEPARATOR)[0];
                String str3 = node.getIdentifier().split(CooccurrenceAnalyser.BODYSITE_SEPARATOR)[1];
                if (str2.contains(CooccurrenceAnalyser.LINEAGE_SEPARATOR)) {
                    String str4 = str2.split(CooccurrenceAnalyser.LINEAGE_SEPARATOR)[0];
                    str = str2.split(CooccurrenceAnalyser.LINEAGE_SEPARATOR)[1];
                    if (str.equals("unclassified")) {
                        str = str4;
                    }
                } else {
                    str = str2;
                }
                if (!graphDataLinker.hasDataAnnotation(node.getIdentifier(), CooccurrenceConstants.BODYSITE_ATTRIBUTE)) {
                    graphDataLinker.getDatas().get(0).put(node.getIdentifier(), CooccurrenceConstants.BODYSITE_ATTRIBUTE, str3);
                }
                if (!graphDataLinker.hasDataAnnotation(node.getIdentifier(), CooccurrenceConstants.LINEAGE_ATTRIBUTE)) {
                    if (this._higherLevelTaxonVsLineage.containsKey(str)) {
                        graphDataLinker.getDatas().get(0).put(node.getIdentifier(), CooccurrenceConstants.LINEAGE_ATTRIBUTE, this._higherLevelTaxonVsLineage.get(str));
                    } else {
                        this._logger.warn("No lineage assembled for taxon: " + str + "!");
                    }
                }
                if (!graphDataLinker.hasDataAnnotation(node.getIdentifier(), CooccurrenceConstants.TAXON_ATTRIBUTE)) {
                    graphDataLinker.getDatas().get(0).put(node.getIdentifier(), CooccurrenceConstants.TAXON_ATTRIBUTE, str);
                }
                if (!graphDataLinker.hasDataAnnotation(node.getIdentifier(), FeatureMatrixLoader.IS_FEATURE_ATTRIB)) {
                    graphDataLinker.getDatas().get(0).put(node.getIdentifier(), FeatureMatrixLoader.IS_FEATURE_ATTRIB, FeatureMatrixLoader.FEATURE_ABSENT_VALUE);
                }
            }
        }
        NetworkFilterer networkFilterer = new NetworkFilterer();
        networkFilterer.setNetwork(graphDataLinker);
        networkFilterer.setLineageAttrib(CooccurrenceConstants.LINEAGE_ATTRIBUTE);
        networkFilterer.setTaxonAttrib(CooccurrenceConstants.TAXON_ATTRIBUTE);
        networkFilterer.setBodysiteAttrib(CooccurrenceConstants.BODYSITE_ATTRIBUTE);
        networkFilterer.setFilterPhyloHierarchicalLinks(true);
        networkFilterer.filter();
        this._logger.info(networkFilterer.toString());
        return networkFilterer.getFilteredNetwork();
    }

    public void setLineageAttrib(String str) {
        this._lineageAttrib = str;
    }

    public String getLineageAttrib() {
        return this._lineageAttrib;
    }

    public boolean isStandardQiimeLineage() {
        return this._standardQiimeLineage;
    }

    public void setStandardQiimeLineage(boolean z) {
        this._standardQiimeLineage = z;
    }

    public void setGroupAttrib(String str) {
        this._groupAttrib = str;
    }

    public String getGroupAttrib() {
        return this._groupAttrib;
    }

    public void setInputMatrix(Matrix matrix) {
        this._inputMatrix = matrix;
        this._addedRowNumber = 0;
        this._higherLevelTaxaAssigned = false;
    }

    public Matrix getInputMatrix() {
        return this._inputMatrix;
    }

    public void setTwoLevelRowNames(boolean z) {
        this._twoLevelRowNames = z;
    }

    public boolean isTwoLevelRowNames() {
        return this._twoLevelRowNames;
    }

    private void setExtendedMatrix(Matrix matrix) {
        this._extendedMatrix = matrix;
    }

    public Matrix getExtendedMatrix() {
        return this._extendedMatrix;
    }

    @Override // be.ac.vub.bsb.cooccurrence.util.IMethod
    public List<String> getParameters() {
        ArrayList arrayList = new ArrayList();
        arrayList.add("LineageAttrib");
        arrayList.add("BodysiteAttrib");
        arrayList.add("TwoLevelRowNames");
        return arrayList;
    }

    @Override // be.ac.vub.bsb.cooccurrence.util.IMethod
    public Object getResult() {
        return this._extendedMatrix;
    }

    @Override // be.ac.vub.bsb.cooccurrence.util.IMethod
    public void run() {
        assignHigherLevelTaxa();
    }

    @Override // be.ac.vub.bsb.cooccurrence.util.IMethod
    public void setInput(Object obj) {
        if (obj instanceof Matrix) {
            setInputMatrix((Matrix) obj);
        } else {
            this._logger.error("Input object should be a matrix!");
        }
    }

    @Override // be.ac.vub.bsb.cooccurrence.util.IMethod
    public String toString() {
        return String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf("# ") + "Higher-level taxa assigner\n") + "# Date=" + new Date().toString() + "\n") + "# INPUT\n") + "# Rows in input matrix=" + getInputMatrix().getMatrix().rows() + "\n") + "# Columns in input matrix=" + getInputMatrix().getMatrix().columns() + "\n") + "# RESULT\n") + "# Rows in extended matrix=" + getExtendedMatrix().getMatrix().rows() + "\n") + "# Columns in extended matrix=" + getExtendedMatrix().getMatrix().columns() + "\n") + "# Number of added higher-level taxa rows=" + this._addedRowNumber + "\n") + "# PARAMETER\n") + "# Lineage attribute=" + getLineageAttrib() + "\n") + "# Group attribute=" + getGroupAttrib() + "\n") + "# Standard QIIME lineages=" + isStandardQiimeLineage() + "\n") + "# Two-level row names=" + isTwoLevelRowNames() + "\n";
    }

    public static void main(String[] strArr) {
        Timer timer = new Timer();
        timer.startTimer();
        Matrix matrix = new Matrix();
        matrix.readMatrix("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/InputEnvSpec/grasslands/test/grasslands.txt", false);
        new ArrayList().add(CooccurrenceConstants.LINEAGE_ATTRIBUTE);
        ArrayList arrayList = new ArrayList();
        List<String> stringToList = DiverseTools.stringToList(UnifracWrapper.QIIME_METADATA_NAMES, CooccurrenceAnalyser.ITEM_SEPARATOR);
        ArrayList arrayList2 = new ArrayList();
        if (!stringToList.isEmpty()) {
            for (String str : stringToList) {
                arrayList2.add("java.lang.String");
            }
        }
        CooccurrenceAnalyser.LINEAGE_SEPARATOR = "--";
        matrix.readRowMetaData("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Results/Quiime/InputEnvSpec/grasslands/qiime_db_otu_metadata_grasslands.txt", stringToList, arrayList);
        HigherLevelTaxaAssigner higherLevelTaxaAssigner = new HigherLevelTaxaAssigner();
        higherLevelTaxaAssigner.setInputMatrix(matrix);
        higherLevelTaxaAssigner.setLineageAttrib(CooccurrenceConstants.LINEAGE_ATTRIBUTE);
        higherLevelTaxaAssigner.assignHigherLevelTaxa();
        timer.stopTimer();
        System.out.println("Runtime in ms: " + timer.getRuntime());
        higherLevelTaxaAssigner.getExtendedMatrix().writeMatrix("matrixWithHigherLevelTaxa.txt", "\t", true, true);
    }
}
