package be.ac.vub.bsb.parsers.util;

import be.ac.ulb.bigre.pathwayinference.core.core.PathwayinferenceConstants;
import be.ac.ulb.bigre.pathwayinference.core.io.IOTools;
import be.ac.ulb.bigre.pathwayinference.core.util.DiverseTools;
import be.ac.ulb.scmbb.snow.graph.core.Data;
import be.ac.vub.bsb.cooccurrence.cmd.OptionNames;
import be.ac.vub.bsb.cooccurrence.conversion.AbundanceMatrixNormalizer;
import be.ac.vub.bsb.cooccurrence.core.CooccurrenceConstants;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import be.ac.vub.bsb.cooccurrence.util.HigherLevelTaxaMetadataComplementer;
import be.ac.vub.bsb.cooccurrence.util.ToolBox;
import be.ac.vub.bsb.parsers.ncbi.TaxonomyProvider;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.math3.geometry.VectorFormat;
import org.apache.log4j.Logger;

/* loaded from: input_file:be/ac/vub/bsb/parsers/util/BiomTableParser.class */
public class BiomTableParser extends GenericDelimFlatFileParser {
    public static String NO_BLAST_HIT = "No blast hit";
    public static Integer MIN_OCCURRENCE = 1;
    public static String TAXONOMY_ENTRY = "taxonomy";
    public static String TAXONOMY_ENTRY_2 = "Consensus Lineage";
    public static String EMPTY_METADATUM = "none";
    public static String UNCLASSIFIED = "unclassified";
    public static String DEFAULT_LINEAGE_SEPARATOR = "--";
    public static String OTU_PREFIX = "OTU_";
    public static String EXPERIMENT_PREFIX = "SAMPLE_";
    public static String DOUBLE_UNDERSCORE_EMULATOR = "-";
    public static String QIIME_LINEAGE_SEPARATOR = VectorFormat.DEFAULT_SEPARATOR;
    public static String QIIME_ALT_LINEAGE_SEPARATOR = ";";
    public static String KINGDOM_PREFIX = "k__";
    public static String PHYLUM_PREFIX = "p__";
    public static String CLASS_PREFIX = "c__";
    public static String ORDER_PREFIX = "o__";
    public static String FAMILY_PREFIX = "f__";
    public static String GENUS_PREFIX = "g__";
    public static String SPECIES_PREFIX = "s__";
    public static String[] LINEAGE_PREFIXES = {KINGDOM_PREFIX, PHYLUM_PREFIX, CLASS_PREFIX, ORDER_PREFIX, FAMILY_PREFIX, GENUS_PREFIX, SPECIES_PREFIX};
    public static String[] STANDARD_TAXONOMIC_LEVELS = {TaxonomyProvider.KINGDOM, TaxonomyProvider.PHYLUM, TaxonomyProvider.CLASS, TaxonomyProvider.ORDER, TaxonomyProvider.FAMILY, TaxonomyProvider.GENUS, TaxonomyProvider.SPECIES};
    public static String[] CLASS_AND_PHYLUM_IDENTICAL = {"Actinobacteria", "Chlamydiae", "Nitrospira"};
    private PrintWriter _metadataWriter;
    private String _metadataFileLocation = "";
    private String _lineageSeparator = DEFAULT_LINEAGE_SEPARATOR;
    private boolean _header = true;
    private boolean _writerOpened = false;
    private boolean _containsTaxonomy = false;
    private boolean _taxonomyInRowName = false;
    private Set<String> _taxonNames = new HashSet();
    private int _taxonCounter = 1;
    private boolean _hitNumbersProvided = false;
    private Data _hitNumberPerTaxon = Data.newData("hit number");
    private Logger _logger = Logger.getLogger(BiomTableParser.class.getPackage().toString());

    public BiomTableParser() {
        super.init();
    }

    @Override // be.ac.vub.bsb.parsers.util.GenericDelimFlatFileParser, be.ac.vub.bsb.parsers.util.GenericFlatFileParser, be.ac.vub.bsb.parsers.util.IGenericParser
    public void parse() {
        super.setInputDelimiter("\t");
        super.setOutputDelimiter("\t");
        super.setCommentSymbol("&&&&&&&&&");
        super.goThroughLines();
        if (this._taxonomyInRowName) {
            this._containsTaxonomy = true;
        }
        if (!this._containsTaxonomy || this._metadataFileLocation.isEmpty()) {
            return;
        }
        this._metadataWriter.close();
    }

    private void openMetadataWriter() {
        if (this._writerOpened) {
            return;
        }
        try {
            this._metadataWriter = new PrintWriter(new BufferedWriter(new FileWriter(this._metadataFileLocation)));
            this._writerOpened = true;
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private String parseHitNumber(String str, String str2, String str3) {
        String[] split = str2.split("\\(");
        String trim = split[1].replace(")", "").trim();
        String str4 = split[0];
        if (str3.startsWith(KINGDOM_PREFIX)) {
            str3 = TaxonomyProvider.KINGDOM;
        }
        if (str3.startsWith(PHYLUM_PREFIX)) {
            str3 = TaxonomyProvider.PHYLUM;
        }
        if (str3.startsWith(CLASS_PREFIX)) {
            str3 = TaxonomyProvider.CLASS;
        } else if (str3.startsWith(ORDER_PREFIX)) {
            str3 = TaxonomyProvider.ORDER;
        } else if (str3.startsWith(FAMILY_PREFIX)) {
            str3 = TaxonomyProvider.FAMILY;
        } else if (str3.startsWith(GENUS_PREFIX)) {
            str3 = TaxonomyProvider.GENUS;
        } else if (str3.startsWith(SPECIES_PREFIX)) {
            str3 = TaxonomyProvider.SPECIES;
        }
        String replaceSpecialChars = DiverseTools.replaceSpecialChars(str, ToolBox.getCoNetSpecialCharsReplacementTable());
        if (!this._hitNumberPerTaxon.hasAnnotation(replaceSpecialChars, str3)) {
            this._hitNumberPerTaxon.put(replaceSpecialChars, str3, trim);
        }
        return str4;
    }

    private String checkHitNumber(String str, String str2, String str3) {
        if (str2.endsWith(")")) {
            if (super.getLineCounter() < 10) {
                try {
                    try {
                        Integer.valueOf(Integer.parseInt(str2.split("\\(")[1].replace(")", "").trim()));
                        this._hitNumbersProvided = true;
                        return parseHitNumber(str, str2, str3);
                    } catch (NumberFormatException e) {
                        return str2;
                    }
                } catch (Error e2) {
                    return str2;
                }
            }
            if (this._hitNumbersProvided) {
                return parseHitNumber(str, str2, str3);
            }
        }
        return str2;
    }

    @Override // be.ac.vub.bsb.parsers.util.GenericDelimFlatFileParser
    protected String processLine(String str) {
        String str2;
        String str3;
        String str4;
        String str5 = "";
        int i = 0;
        String str6 = "";
        String str7 = "";
        if (str.contains("Constructed from biom file") || str.toLowerCase().contains("otu table")) {
            str2 = "";
        } else {
            if (this._header) {
                if (str.toLowerCase().contains(TAXONOMY_ENTRY.toLowerCase()) || str.toLowerCase().contains(TAXONOMY_ENTRY_2.toLowerCase())) {
                    this._containsTaxonomy = true;
                }
                if (this._containsTaxonomy && !this._metadataFileLocation.isEmpty()) {
                    openMetadataWriter();
                }
            }
            String[] split = str.split(getInputDelimiter());
            if (this._header) {
                int length = split.length;
                if (this._containsTaxonomy) {
                    length = split.length - 1;
                }
                int i2 = 0;
                while (i2 < length) {
                    str5 = i2 == 0 ? String.valueOf(str5) + "\t" + split[i2] : String.valueOf(str5) + "\t" + EXPERIMENT_PREFIX + split[i2];
                    i2++;
                }
                if (str5.startsWith("\t")) {
                    str5 = str5.replaceFirst("\t", "");
                }
                if (str5.startsWith("#")) {
                    str5 = str5.replaceFirst("#", "");
                }
                str2 = String.valueOf(str5) + "\n";
            } else {
                if ((split[0].contains(QIIME_LINEAGE_SEPARATOR) || split[0].contains(QIIME_ALT_LINEAGE_SEPARATOR)) && !this._containsTaxonomy) {
                    this._taxonomyInRowName = true;
                    if (!this._metadataFileLocation.isEmpty()) {
                        openMetadataWriter();
                    }
                }
                for (int i3 = 0; i3 < split.length; i3++) {
                    if (this._containsTaxonomy) {
                        if (i3 < split.length - 1) {
                            str5 = String.valueOf(str5) + "\t" + split[i3];
                        }
                    } else if (this._taxonomyInRowName && i3 == 0) {
                        String str8 = split[i3];
                        String[] split2 = str8.contains(QIIME_LINEAGE_SEPARATOR) ? str8.split(QIIME_LINEAGE_SEPARATOR) : str8.split(QIIME_ALT_LINEAGE_SEPARATOR);
                        str7 = split2[split2.length - 1].trim();
                        if (str7.contains("__") && !str7.endsWith("__")) {
                            str7 = str7.split("__")[1];
                        } else if (str7.endsWith("__") || str7.equals("Other")) {
                            int length2 = split2.length - 2;
                            while (true) {
                                if (!str7.endsWith("__") && !str7.endsWith("Other")) {
                                    break;
                                }
                                str7 = length2 > 0 ? split2[length2] : (split2[length2].endsWith("__") || split2[length2].endsWith("Other")) ? EMPTY_METADATUM : split2[length2];
                                length2--;
                            }
                        } else if (str7.isEmpty()) {
                            str7 = EMPTY_METADATUM;
                        }
                        if (this._taxonNames.contains(str7)) {
                            str7 = String.valueOf(str7) + this._taxonCounter;
                            this._taxonCounter++;
                        }
                        this._taxonNames.add(str7);
                        str5 = String.valueOf(str5) + "\t" + str7;
                    } else {
                        str5 = String.valueOf(str5) + "\t" + split[i3];
                    }
                    if (i3 > 0 && !split[i3].equals("0")) {
                        i++;
                    }
                }
                if (str5.startsWith("\t")) {
                    str5 = str5.replaceFirst("\t", "");
                }
                str2 = i >= MIN_OCCURRENCE.intValue() ? !this._taxonomyInRowName ? String.valueOf(OTU_PREFIX) + str5 + "\n" : String.valueOf(str5) + "\n" : "";
            }
            if ((this._containsTaxonomy || this._taxonomyInRowName) && !this._header) {
                if (this._containsTaxonomy) {
                    str4 = String.valueOf(OTU_PREFIX) + split[0];
                    str7 = str4;
                    str3 = split[split.length - 1];
                } else {
                    str3 = split[0];
                    str4 = str7;
                }
                String[] split3 = str3.contains(QIIME_LINEAGE_SEPARATOR) ? str3.split(QIIME_LINEAGE_SEPARATOR) : str3.split(QIIME_ALT_LINEAGE_SEPARATOR);
                if (str3.equals(NO_BLAST_HIT)) {
                    for (String str9 : LINEAGE_PREFIXES) {
                        str4 = String.valueOf(str4) + "\t" + EMPTY_METADATUM;
                    }
                    str6 = CooccurrenceConstants.INTERACTION_TYPE_UNKNOWN;
                } else {
                    int i4 = 0;
                    for (String str10 : LINEAGE_PREFIXES) {
                        String str11 = i4 > split3.length - 1 ? EMPTY_METADATUM : split3[i4];
                        if (str11.endsWith("\"")) {
                            str11 = str11.replace("\"", "");
                        }
                        String checkHitNumber = checkHitNumber(str7, str11, str10);
                        String str12 = checkHitNumber.contains("__") ? (checkHitNumber.endsWith("__") || checkHitNumber.equals("Other")) ? EMPTY_METADATUM : checkHitNumber.split("__")[1] : checkHitNumber;
                        if (str12.isEmpty()) {
                            str12 = EMPTY_METADATUM;
                        }
                        str4 = String.valueOf(str4) + "\t" + str12;
                        if (!str12.equals(EMPTY_METADATUM)) {
                            str6 = String.valueOf(str6) + getLineageSeparator() + str12;
                        }
                        i4++;
                    }
                    if (str6.startsWith(getLineageSeparator())) {
                        str6 = str6.replaceFirst(getLineageSeparator(), "");
                    }
                }
                if (str6.endsWith("-- ")) {
                    str6 = str6.substring(0, str6.length() - 3);
                }
                String str13 = this._taxonomyInRowName ? String.valueOf(str4) + "\t" + str6 + "\t" + str7 + "\n" : String.valueOf(str4) + "\t" + str6 + getLineageSeparator() + str7 + "\t" + str7 + "\n";
                if (!this._metadataFileLocation.isEmpty()) {
                    this._metadataWriter.print(str13);
                    this._metadataWriter.flush();
                }
            }
            this._header = false;
        }
        return str2;
    }

    public boolean isContainsTaxonomy() {
        return this._containsTaxonomy;
    }

    public boolean isContainsHitNumber() {
        return this._hitNumbersProvided;
    }

    public Data getHitNumbers() {
        return this._hitNumberPerTaxon;
    }

    public String getMetadataFileLocation() {
        return this._metadataFileLocation;
    }

    public void setMetadataFileLocation(String str) {
        this._metadataFileLocation = str;
    }

    public String getLineageSeparator() {
        return this._lineageSeparator;
    }

    public void setLineageSeparator(String str) {
        this._lineageSeparator = str;
    }

    public Logger getLogger() {
        return this._logger;
    }

    public void setLogger(Logger logger) {
        this._logger = logger;
    }

    public static void assignHigherLevelTaxaAndComplementLineages(Matrix matrix, boolean z, String str, String str2, String str3) {
        if (z) {
            AbundanceMatrixNormalizer abundanceMatrixNormalizer = new AbundanceMatrixNormalizer(matrix);
            abundanceMatrixNormalizer.setStandardizationMethods(AbundanceMatrixNormalizer.COLUMN_NORMALIZATION_BY_SUMDIVISION);
            abundanceMatrixNormalizer.normalize();
            matrix = abundanceMatrixNormalizer.getNormalizedAbundanceMatrix();
        }
        HigherLevelTaxaMetadataComplementer higherLevelTaxaMetadataComplementer = new HigherLevelTaxaMetadataComplementer();
        higherLevelTaxaMetadataComplementer.setMatrix(matrix);
        higherLevelTaxaMetadataComplementer.setAssignHigherLevelTaxa(true);
        higherLevelTaxaMetadataComplementer.setOnRows(true);
        higherLevelTaxaMetadataComplementer.setTreatSpecialChars(false);
        higherLevelTaxaMetadataComplementer.setMetadataFileRows(str);
        higherLevelTaxaMetadataComplementer.setFillPredefTaxonLevels(true);
        ArrayList arrayList = new ArrayList();
        arrayList.add("kingdom");
        arrayList.add("phylum");
        arrayList.add("class");
        arrayList.add(PathwayinferenceConstants.ORDER);
        arrayList.add(OptionNames.errorDistribution);
        arrayList.add("genus");
        arrayList.add("species");
        arrayList.add(CooccurrenceConstants.LINEAGE_ATTRIBUTE);
        arrayList.add(CooccurrenceConstants.TAXON_ATTRIBUTE);
        ArrayList arrayList2 = new ArrayList();
        for (String str4 : arrayList) {
            arrayList2.add("java.lang.String");
        }
        higherLevelTaxaMetadataComplementer.setAttributes(arrayList);
        higherLevelTaxaMetadataComplementer.setAttribTypes(arrayList2);
        higherLevelTaxaMetadataComplementer.loadMetaDataFromFile();
        higherLevelTaxaMetadataComplementer.addHigherLevelTaxaEntriesInMetadata();
        higherLevelTaxaMetadataComplementer.writeMetaDataToFile(str2);
        higherLevelTaxaMetadataComplementer.getMatrix().writeMatrix(str3, "\t", true, true);
    }

    public static void main(String[] strArr) {
        String replace = IOTools.getFileWithoutDir("/Users/u0097353/Documents/Documents_Karoline/BSB_Lab/CoNetProject/ErrorReports/Francisco-Coelho/data/Bacteria_table.txt").replace(".txt", "_parsed");
        BiomTableParser biomTableParser = new BiomTableParser();
        biomTableParser.setInputLocation("/Users/u0097353/Documents/Documents_Karoline/BSB_Lab/CoNetProject/ErrorReports/Francisco-Coelho/data/Bacteria_table.txt");
        biomTableParser.setOutputLocation(String.valueOf(replace) + ".txt");
        biomTableParser.setMetadataFileLocation(String.valueOf(replace) + "_metadata.txt");
        biomTableParser.parse();
        System.out.println("Taxonomy provided? " + biomTableParser.isContainsTaxonomy());
        System.out.println("Hit number provided? " + biomTableParser.isContainsHitNumber());
        Matrix matrix = new Matrix();
        matrix.readMatrix(String.valueOf(replace) + ".txt", false);
        System.out.println(String.valueOf(matrix.getMatrix().rows()) + " x " + matrix.getMatrix().columns());
        List<String> stringToList = DiverseTools.stringToList("kingdom/phylum/class/order/family/genus/species/lineage/taxon", "/");
        ArrayList arrayList = new ArrayList();
        for (String str : stringToList) {
            arrayList.add("java.lang.String");
        }
        matrix.readRowMetaData(String.valueOf(replace) + "_metadata.txt", stringToList, arrayList);
        int indexOfRowName = matrix.getIndexOfRowName("OTU_85");
        System.out.println(indexOfRowName);
        System.out.println(matrix.getRowMetaAnnotation(indexOfRowName, CooccurrenceConstants.LINEAGE_ATTRIBUTE));
    }
}
