package be.ac.vub.bsb.parsers.vdp;

import be.ac.ulb.bigre.pathwayinference.core.core.PathwayinferenceConstants;
import be.ac.ulb.bigre.pathwayinference.core.io.IOTools;
import be.ac.ulb.bigre.pathwayinference.core.io.TwoColumnHashMapParser;
import be.ac.ulb.bigre.pathwayinference.core.util.DiverseTools;
import be.ac.vub.bsb.cooccurrence.conversion.MatrixFilterer;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import be.ac.vub.bsb.cooccurrence.util.ArrayTools;
import be.ac.vub.bsb.parsers.ncbi.NCBIDBOnlineQueries;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.math3.geometry.VectorFormat;

/* loaded from: input_file:be/ac/vub/bsb/parsers/vdp/VDPLineageAssigner.class */
public class VDPLineageAssigner {
    public static String LINEAGE_LEVELS = "kingdom/phylum/class/order/family/genus/species";
    private int _minocc = 0;
    private Matrix _vdpGenera = new Matrix();
    private String _metadataLocation = "";
    private String _otuGenusMappingLocation = "";
    private boolean _otus = false;
    private boolean _test = false;
    private Map<String, String> _genusVsLineage = new HashMap();
    private Map<String, String> _otuVsGenus = new HashMap();

    public static Set<String> assembleSubLevelNames() {
        HashMap hashMap = new HashMap();
        hashMap.put("delta/epsilon subdivisions", "subphylum");
        hashMap.put("Actinobacteridae", "subclass");
        hashMap.put("Coriobacteridae", "subclass");
        hashMap.put("Coriobacterineae", "suborder");
        hashMap.put("Corynebacterineae", "suborder");
        hashMap.put("Chlamydiae/Verrucomicrobia group", "superphylum");
        hashMap.put("Bacteroidetes/Chlorobi group", "superphylum");
        hashMap.put("Actinobacteridae", "subclass");
        hashMap.put("Actinomycineae", "suborder");
        hashMap.put("Clostridiales incertae sedis", "no rank");
        hashMap.put("Burkholderiales Genera incertae sedis", "no rank");
        hashMap.put("Micrococcineae", "suborder");
        hashMap.put("Mobiluncus/Falcivibrio group", "no rank");
        hashMap.put("Bacillales incertae sedis", "no rank");
        hashMap.put("Chlamydia/Chlamydophila group", "no rank");
        hashMap.put("Frankineae", "suborder");
        hashMap.put("Nitriliruptoridae", "subclass");
        hashMap.put("Propionibacterineae", "suborder");
        hashMap.put("unclassified Solibacterales", "no rank");
        hashMap.put("Fibrobacteres/Acidobacteria group", "superphylum");
        hashMap.put("Nannocystineae", "suborder");
        hashMap.put("Pseudonocardineae", "suborder");
        hashMap.put("Kineosporiineae", "suborder");
        hashMap.put("Micromonosporineae", "suborder");
        hashMap.put("Rubrobacteridae", "subclass");
        hashMap.put("Acidimicrobineae", "suborder");
        hashMap.put("Sorangiineae", "suborder");
        hashMap.put("Cystobacterineae", "suborder");
        hashMap.put("Acidimicrobidae", "subclass");
        hashMap.put("Wolbachieae", "tribe");
        hashMap.put("Streptosporangineae", "suborder");
        hashMap.put("Streptomycineae", "suborder");
        hashMap.put("Zymobacter group", "no rank");
        hashMap.put("Chloroflexineae", "suborder");
        hashMap.put("Rhizobium/Agrobacterium group", "no rank");
        hashMap.put("Chlorobium/Pelodictyon group", "no rank");
        hashMap.put("Moorella group", "no rank");
        hashMap.put("Rubrobacterineae", "suborder");
        hashMap.put("Catenulisporineae", "suborder");
        hashMap.put("Actinopolysporineae", "suborder");
        hashMap.put("Sinorhizobium/Ensifer group", "no rank");
        hashMap.put("Azotobacter group", "no rank");
        hashMap.put("Aneurinibacillus group", "no rank");
        hashMap.put("Chromobacterium group", "no rank");
        hashMap.put("Rickettsiales genera incertae sedis", "no rank");
        hashMap.put("Rickettsieae", "tribe");
        return hashMap.keySet();
    }

    private String specialCaseTreatment(String str) {
        String str2 = "";
        if (str.equals("Lachnospiracea_incertae_sedis")) {
            str2 = isOtus() ? "Bacteria; Firmicutes; Clostridia; Clostridiales; Lachnospiraceae" : "Bacteria; Firmicutes; Clostridia; Clostridiales; Lachnospiraceae; Lachnospiracea_incertae_sedis";
        } else if (str.equals("Erysipelotrichaceae_incertae_sedis")) {
            str2 = isOtus() ? "Bacteria; Firmicutes; Erysipelotrichia; Erysipelotrichales; Erysipelotrichaceae" : "Bacteria; Firmicutes; Erysipelotrichia; Erysipelotrichales; Erysipelotrichaceae; Erysipelotrichaceae_incertae_sedis";
        } else if (str.equals("unclassified_Incertae Sedis XI")) {
            str2 = "Bacteria; unclassified_Incertae Sedis XI";
        }
        if (!str2.isEmpty()) {
            System.out.println("Special case " + str);
            System.out.println("Setting lineage " + str2);
        }
        return str2;
    }

    public void assignNCBILineages() {
        String specialCaseTreatment;
        String str;
        String str2;
        if (getMinocc() > 0) {
            MatrixFilterer matrixFilterer = new MatrixFilterer(getVdpGenera());
            matrixFilterer.setFilterMethods(MatrixFilterer.ROW_MIN_OCCURRENCE);
            matrixFilterer.setFilterNumbers(Integer.valueOf(getMinocc()).toString());
            matrixFilterer.setSumFilteredNonFeatRowsAndAddAsUnclassified(true);
            matrixFilterer.filter();
            this._vdpGenera = matrixFilterer.getFilteredMatrix();
            System.out.println(matrixFilterer.toString());
        }
        if (!getOtuGenusMappingLocation().isEmpty()) {
            TwoColumnHashMapParser twoColumnHashMapParser = new TwoColumnHashMapParser(getOtuGenusMappingLocation());
            twoColumnHashMapParser.setKeyColumn(0);
            twoColumnHashMapParser.setValueColumn(1);
            this._otuVsGenus = twoColumnHashMapParser.parse();
            System.out.println("Parsed the genera of " + this._otuVsGenus.keySet().size() + " OTUs.");
        } else if (isOtus()) {
            System.err.println("OTU to genus mapping file required!");
        }
        String str3 = "";
        Set<String> assembleSubLevelNames = assembleSubLevelNames();
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        List<String> stringToList = DiverseTools.stringToList(LINEAGE_LEVELS, "/");
        System.out.println("Expected number of lineage levels: " + stringToList.size());
        for (String str4 : getVdpGenera().getRowNames()) {
            String str5 = str4;
            boolean z = false;
            if (!getOtuGenusMappingLocation().isEmpty()) {
                str5 = this._otuVsGenus.get(str4);
            }
            try {
                if (!str5.equals("unclassified_Incertae Sedis XI")) {
                    if (str5.contains("unclassified") && str5.contains(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)) {
                        str5 = str5.split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[1];
                    }
                    if (str5.contains(" ")) {
                        str5 = str5.split(" ")[0];
                    }
                }
            } catch (NullPointerException e) {
                System.err.println("Could not treat correctly " + str5 + ". It is set to empty String.");
                str5 = "";
            }
            if (isTest()) {
                System.out.println(str5);
            }
            if (this._genusVsLineage.containsKey(str5)) {
                specialCaseTreatment = this._genusVsLineage.get(str5);
                if (str5.contains("Lachnospiracea_incertae_sedis") || str5.contains("Erysipelotrichaceae_incertae_sedis")) {
                    z = true;
                }
            } else {
                specialCaseTreatment = specialCaseTreatment(str5);
                if (specialCaseTreatment.isEmpty()) {
                    specialCaseTreatment = NCBIDBOnlineQueries.getOrganismLineageGivenName(str5);
                } else {
                    z = true;
                }
                this._genusVsLineage.put(str5, specialCaseTreatment);
            }
            String str6 = str4;
            if (specialCaseTreatment.isEmpty()) {
                hashSet.add(str4);
                System.err.println("Could not obtain lineage for organism: " + str4 + "!");
            } else {
                String[] split = specialCaseTreatment.split(VectorFormat.DEFAULT_SEPARATOR);
                ArrayList arrayList = new ArrayList();
                String str7 = "";
                for (String str8 : split) {
                    if (!assembleSubLevelNames.contains(str8)) {
                        arrayList.add(str8);
                        str7 = String.valueOf(str7) + VectorFormat.DEFAULT_SEPARATOR + str8;
                    }
                }
                if (str7.startsWith(VectorFormat.DEFAULT_SEPARATOR)) {
                    str7 = str7.replaceFirst(VectorFormat.DEFAULT_SEPARATOR, "");
                }
                String str9 = str7;
                String[] m280toArray = ArrayTools.m280toArray((List<String>) arrayList);
                if (m280toArray.length > stringToList.size()) {
                    hashSet2.add(str4);
                }
                int i = 0;
                int i2 = z ? 0 : 1;
                for (int i3 = i2; i3 < m280toArray.length; i3++) {
                    str6 = String.valueOf(str6) + "\t" + m280toArray[i3];
                    i++;
                }
                if (isOtus()) {
                    str6 = String.valueOf(str6) + "\t" + this._otuVsGenus.get(str4);
                    str9 = String.valueOf(str9) + "--" + this._otuVsGenus.get(str4);
                    i++;
                }
                if (str4.contains("unclassified")) {
                    str = String.valueOf(str6) + "\t" + str5 + "\t" + str4;
                    str2 = String.valueOf(str9.replace("cellular organisms; ", "").replace(VectorFormat.DEFAULT_SEPARATOR, "--")) + "--" + str5 + "--" + str4;
                    i++;
                } else {
                    str = String.valueOf(str6) + "\t" + str4;
                    str2 = String.valueOf(str9.replace("cellular organisms; ", "").replace(VectorFormat.DEFAULT_SEPARATOR, "--")) + "--" + str4;
                }
                if (i + 1 < stringToList.size()) {
                    for (int i4 = r23; i4 < stringToList.size(); i4++) {
                        str = String.valueOf(str) + "\tnone";
                    }
                }
                if (!isOtus() && !str.endsWith("none")) {
                    hashSet2.add(str4);
                }
                str6 = String.valueOf(str) + "\t" + str2 + "\t" + str4;
            }
            if (isTest()) {
                System.out.println(str6);
            }
            str3 = String.valueOf(str3) + str6 + "\n";
            if (isTest()) {
                System.exit(0);
            }
        }
        System.out.println("No lineage obtained for: " + hashSet.toString());
        System.out.println("Too long lineages obtained for: " + hashSet2.toString());
        IOTools.exportStringToFile(str3, getMetadataLocation());
    }

    public Matrix getVdpGenera() {
        return this._vdpGenera;
    }

    public void setVdpGenera(Matrix matrix) {
        this._vdpGenera = matrix;
    }

    public String getMetadataLocation() {
        return this._metadataLocation;
    }

    public void setMetadataLocation(String str) {
        this._metadataLocation = str;
    }

    public String getOtuGenusMappingLocation() {
        return this._otuGenusMappingLocation;
    }

    public void setOtuGenusMappingLocation(String str) {
        this._otuGenusMappingLocation = str;
    }

    public int getMinocc() {
        return this._minocc;
    }

    public void setMinocc(int i) {
        this._minocc = i;
    }

    public boolean isOtus() {
        return this._otus;
    }

    public void setOtus(boolean z) {
        this._otus = z;
    }

    public Map<String, String> getOTUVsGenus() {
        return this._otuVsGenus;
    }

    public boolean isTest() {
        return this._test;
    }

    public void setTest(boolean z) {
        this._test = z;
    }

    public static void main(String[] strArr) {
        String str = 0 != 0 ? "/Users/u0097353/Documents/Documents_Karoline/BSB_Lab/Data/VDP_for_karoline/Parsed_Freeze1/vdp_otus_nonrar.txt" : "/Users/u0097353/Documents/Documents_Karoline/BSB_Lab/Results/FecalTransfer/Input/FMT_genera.txt";
        Matrix matrix = new Matrix();
        matrix.readMatrix(str, false);
        VDPLineageAssigner vDPLineageAssigner = new VDPLineageAssigner();
        vDPLineageAssigner.setVdpGenera(matrix);
        if (0 != 0) {
            vDPLineageAssigner.setMinocc(100);
            vDPLineageAssigner.setOtuGenusMappingLocation("/Users/u0097353/Documents/Documents_Karoline/BSB_Lab/Data/VDP_for_karoline/Parsed_Freeze1/vdp_otu_genus_mapping.txt");
        }
        vDPLineageAssigner.setTest(false);
        if (0 != 0) {
            vDPLineageAssigner.setMetadataLocation("vdp_ncbi_otu_lineages.txt");
        } else {
            vDPLineageAssigner.setMetadataLocation("FMT_ncbi_lineages.txt");
        }
        vDPLineageAssigner.assignNCBILineages();
        if (0 != 0) {
            vDPLineageAssigner.getVdpGenera().writeMatrix("vdp_otus_nonrar_minocc100.txt", "\t", true, true);
        }
    }
}
