package be.ac.vub.bsb.parsers.curtis;

import be.ac.ulb.bigre.pathwayinference.core.io.IOTools;
import be.ac.vub.bsb.cooccurrence.conversion.AbundanceMatrixNormalizer;
import be.ac.vub.bsb.cooccurrence.conversion.MatrixFilterer;
import be.ac.vub.bsb.cooccurrence.conversion.Preprocessor;
import be.ac.vub.bsb.cooccurrence.conversion.WittenBellSmoother;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import be.ac.vub.bsb.parsers.hmp.HMP16SRNAPatSchlossParser;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import org.apache.log4j.Logger;

/* JADX WARN: Classes with same name are omitted:
  input_file:be/ac/vub/bsb/parsers/curtis/PhylotypeMatrixEmbellisher.class
 */
/* loaded from: input_file:lib/be_ac_vub_bsb_parsers.jar:be/ac/vub/bsb/parsers/curtis/PhylotypeMatrixEmbellisher.class */
public class PhylotypeMatrixEmbellisher {
    private Matrix _phylotypeMatrix;
    private Matrix _embellishedPhylotypeMatrix;
    private String _parserInfo;
    private String _metadataFolder;
    private int _filterSite;
    private static String DATASET_SUFFIX = ".may1";
    private boolean _crossbodysite;
    private Preprocessor _preprocessor;
    private String _filterLevel;
    private Logger _logger;

    public PhylotypeMatrixEmbellisher() {
        this._phylotypeMatrix = new Matrix();
        this._embellishedPhylotypeMatrix = new Matrix();
        this._parserInfo = "";
        this._metadataFolder = "";
        this._filterSite = 0;
        this._crossbodysite = false;
        this._preprocessor = new Preprocessor();
        this._filterLevel = "";
        this._logger = Logger.getLogger(getClass().getPackage().toString());
    }

    public PhylotypeMatrixEmbellisher(Matrix matrix) {
        this._phylotypeMatrix = new Matrix();
        this._embellishedPhylotypeMatrix = new Matrix();
        this._parserInfo = "";
        this._metadataFolder = "";
        this._filterSite = 0;
        this._crossbodysite = false;
        this._preprocessor = new Preprocessor();
        this._filterLevel = "";
        this._logger = Logger.getLogger(getClass().getPackage().toString());
        this._phylotypeMatrix = matrix;
    }

    public void configPreprocessor(Integer num, boolean z, boolean z2, boolean z3) {
        if (z && z2) {
            this._logger.error("Down-sampling and normalization are exclusive!");
        }
        if (z && num.intValue() <= 0) {
            this._logger.error("Downsampling requires a minimum read number!");
        }
        this._preprocessor = new Preprocessor();
        if (num.intValue() > 0) {
            MatrixFilterer matrixFilterer = new MatrixFilterer();
            matrixFilterer.setFilterMethods("col_minsum");
            matrixFilterer.setFilterNumbers(num.toString());
            this._preprocessor.setFilterer(matrixFilterer);
        }
        if (z || z2) {
            AbundanceMatrixNormalizer abundanceMatrixNormalizer = new AbundanceMatrixNormalizer();
            if (z) {
                abundanceMatrixNormalizer.setStandardizationMethods(AbundanceMatrixNormalizer.COLUMN_NORMALIZATION_BY_DOWN_SAMPLING);
            } else {
                abundanceMatrixNormalizer.setStandardizationMethods(AbundanceMatrixNormalizer.COLUMN_NORMALIZATION_BY_SUMDIVISION);
            }
            this._preprocessor.setNormalizer(abundanceMatrixNormalizer);
        }
        if (z3) {
            this._preprocessor.setSmoother(new WittenBellSmoother());
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(Preprocessor.FILTER);
        arrayList.add(Preprocessor.NORMALIZER);
        arrayList.add(Preprocessor.SMOOTHER);
        this._preprocessor.setPreprocessingSteps(arrayList);
    }

    public void embellish() {
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        for (int i = 0; i < this._phylotypeMatrix.getMatrix().columns(); i++) {
            String replace = this._phylotypeMatrix.getColName(i).replace("id_", "");
            if (hashSet.contains(replace)) {
                hashSet2.add(Integer.valueOf(i));
                this._logger.error("Sample " + replace + " appears more than once in the dataset!");
            } else {
                hashSet.add(replace);
                this._phylotypeMatrix.setColName(i, String.valueOf(replace) + DATASET_SUFFIX);
            }
        }
        this._logger.info("Double indices: " + hashSet2.toString());
        this._preprocessor.setMatrix(this._phylotypeMatrix);
        this._preprocessor.preprocess();
        this._phylotypeMatrix = this._preprocessor.getPreprocessedAbundanceMatrix();
        HMP16SRNAPatSchlossParser hMP16SRNAPatSchlossParser = new HMP16SRNAPatSchlossParser();
        hMP16SRNAPatSchlossParser.setWgsMatrix(this._phylotypeMatrix);
        hMP16SRNAPatSchlossParser.setDataFolder(getMetadataFolder());
        System.out.println(hMP16SRNAPatSchlossParser.getDataFolder());
        hMP16SRNAPatSchlossParser.setPhylotype(true);
        hMP16SRNAPatSchlossParser.setAcceptUnclassifiedTaxa(true);
        hMP16SRNAPatSchlossParser.setAddPrivateMetadata(true);
        hMP16SRNAPatSchlossParser.setIncludeMetadata(true);
        hMP16SRNAPatSchlossParser.setBodysiteSpecTaxaAllowingNaN(isCrossbodysite());
        hMP16SRNAPatSchlossParser.setNaContainingBodysiteSpecWithCrossLinks(isCrossbodysite());
        hMP16SRNAPatSchlossParser.setBodysiteSpecTaxa(isCrossbodysite());
        hMP16SRNAPatSchlossParser.setMerge(false);
        hMP16SRNAPatSchlossParser.setMergeRepetitiveTaxa(false);
        hMP16SRNAPatSchlossParser.setReplicaTreatment(HMP16SRNAPatSchlossParser.NONE);
        hMP16SRNAPatSchlossParser.setSeqWindow(35);
        hMP16SRNAPatSchlossParser.setSequenceIdentityThreshold(100.0d);
        hMP16SRNAPatSchlossParser.setFilterSite(getFilterSite());
        hMP16SRNAPatSchlossParser.setRemoveSuspiciousSamples(true);
        hMP16SRNAPatSchlossParser.parse();
        hMP16SRNAPatSchlossParser.filterZeroLines();
        if (!getFilterLevel().isEmpty()) {
            hMP16SRNAPatSchlossParser.filterTaxa(getFilterLevel());
        }
        this._parserInfo = hMP16SRNAPatSchlossParser.toString();
        setEmbellishedPhylotypeMatrix(hMP16SRNAPatSchlossParser.getOutputMatrix());
    }

    public void setEmbellishedPhylotypeMatrix(Matrix matrix) {
        this._embellishedPhylotypeMatrix = matrix;
    }

    public Matrix getEmbellishedPhylotypeMatrix() {
        return this._embellishedPhylotypeMatrix;
    }

    public void setMetadataFolder(String str) {
        this._metadataFolder = str;
    }

    public String getMetadataFolder() {
        return this._metadataFolder;
    }

    public void setCrossbodysite(boolean z) {
        this._crossbodysite = z;
    }

    public boolean isCrossbodysite() {
        return this._crossbodysite;
    }

    public void setPreprocessor(Preprocessor preprocessor) {
        this._preprocessor = preprocessor;
    }

    public Preprocessor getPreprocessor() {
        return this._preprocessor;
    }

    public void setFilterSite(int i) {
        this._filterSite = i;
    }

    public int getFilterSite() {
        return this._filterSite;
    }

    public void setFilterLevel(String str) {
        this._filterLevel = str;
    }

    public String getFilterLevel() {
        return this._filterLevel;
    }

    public String toString() {
        return String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf("# ") + "HMP 16S phylotype Data Parser\n") + "# Date=" + new Date().toString() + "\n") + "# PARAMETER\n") + "# Cross-bodysite matrix=" + isCrossbodysite() + "\n") + "# Sample-wise preprocessing steps (carried out prior to sample filtering and reformatting into cross-bodysite matrix)" + getPreprocessor().toString() + "\n") + "# Phylogenetic level filter=" + getFilterLevel() + "\n") + "# OUTPUT\n") + "# Number of rows (taxa and, if included, features) in output matrix=" + this._phylotypeMatrix.getMatrix().rows() + "\n") + "# Number of columns (samples) in output matrix=" + this._phylotypeMatrix.getMatrix().columns() + "\n") + "# CONFIGURATION OF 16S PARSER (USED FOR FILTERING AND REFORMATTING)\n") + this._parserInfo;
    }

    public static void main(String[] strArr) {
        Matrix matrix = new Matrix();
        matrix.readMatrix("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/HMP/HMP_full/Curtis_Phylotypes/PatSchloss3.0/hmp_v35_phylotypes.txt", false);
        PhylotypeMatrixEmbellisher phylotypeMatrixEmbellisher = new PhylotypeMatrixEmbellisher(matrix);
        phylotypeMatrixEmbellisher.setMetadataFolder("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/HMP/HMP_full/HMP_public_data/16S/PatSchloss3.0");
        phylotypeMatrixEmbellisher.configPreprocessor(0, false, false, false);
        phylotypeMatrixEmbellisher.setCrossbodysite(true);
        phylotypeMatrixEmbellisher.embellish();
        Matrix embellishedPhylotypeMatrix = phylotypeMatrixEmbellisher.getEmbellishedPhylotypeMatrix();
        IOTools.exportStringToFile(phylotypeMatrixEmbellisher.toString(), "PhylotypeParserConfig.txt");
        embellishedPhylotypeMatrix.writeMatrix("hmp_v35_phylotypes_bodysites.txt", "\t", true, true);
    }
}
