package be.ac.vub.bsb.parsers.hmp;

import be.ac.ulb.bigre.pathwayinference.core.core.PathwayinferenceConstants;
import be.ac.ulb.bigre.pathwayinference.core.io.IOTools;
import be.ac.ulb.bigre.pathwayinference.core.io.OneColumnSetParser;
import be.ac.ulb.bigre.pathwayinference.core.io.TwoColumnHashMapParser;
import be.ac.ulb.bigre.pathwayinference.core.util.DiverseTools;
import be.ac.vub.bsb.cooccurrence.conversion.AbundanceMatrixNormalizer;
import be.ac.vub.bsb.cooccurrence.conversion.MatrixFilterer;
import be.ac.vub.bsb.cooccurrence.measures.MatrixToolsProvider;
import be.ac.vub.bsb.cooccurrence.measures.StatsProvider;
import be.ac.vub.bsb.cooccurrence.util.ArrayTools;
import be.ac.vub.bsb.cooccurrence.util.FeatureMatrixLoader;
import be.ac.vub.bsb.cooccurrence.util.Timer;
import be.ac.vub.bsb.cooccurrence.util.VectorToolsProvider;
import be.ac.vub.bsb.parsers.ncbi.TaxonomyProvider;
import cern.colt.matrix.DoubleMatrix1D;
import cern.colt.matrix.impl.AbstractFormatter;
import cern.colt.matrix.impl.DenseDoubleMatrix1D;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.commons.cli.HelpFormatter;
import org.apache.log4j.Logger;
import org.ujmp.core.Matrix;
import org.ujmp.core.MatrixFactory;
import org.ujmp.core.enums.FileFormat;
import org.ujmp.core.exceptions.MatrixException;

/* loaded from: input_file:be/ac/vub/bsb/parsers/hmp/HMP16SRNAPatSchlossParser.class */
public class HMP16SRNAPatSchlossParser {
    private boolean _includeMetadata = false;
    private Map<String, List<String>> _sampleVersusMetadataLookup = new TreeMap();
    private Map<String, Set<String>> _technicalReplica = new HashMap();
    private Map<String, String> _idVersusSampleId = new HashMap();
    private TreeMap<String, double[]> _metadataTempMatrix = new TreeMap<>();
    private Matrix _metadataMatrix = MatrixFactory.emptyMatrix();
    private String _dataFolder = "";
    private be.ac.vub.bsb.cooccurrence.measures.Matrix _outputMatrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix();
    private be.ac.vub.bsb.cooccurrence.measures.Matrix _wgsMatrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix();
    private Logger _logger = Logger.getLogger(getClass().getPackage().toString());
    private int _seqWindow = HMP16SRNAPatSchlossParsingHelper.DEFAULT_16SREGION;
    private boolean _bodysiteSpecTaxa = false;
    private boolean _bodysiteSpecTaxaAllowingNaN = false;
    private boolean _naContainingBodysiteSpecWithCrossLinks = false;
    private boolean _merge = false;
    private boolean _phylotype = false;
    private double _sequenceIdentityThreshold = HMP16SRNAPatSchlossParsingHelper.DEFAULT_SEQUENCE_ID_THRESHOLD;
    private String _mergeTaxonomicLevel = "";
    private String _replicaTreatment = DEFAULT_TECHNICAL_REPLICA_TREATMENT;
    private int _numOverlappingSamples = 0;
    private boolean _filterZeroLines = false;
    private List<String> _zeroLines = new ArrayList();
    private List<String> _unclassifiedLines = new ArrayList();
    private List<String> _samplesWithMissingMetadata = new ArrayList();
    private boolean _higherLevelTaxaFiltered = false;
    private boolean _mergeRepetitiveTaxa = false;
    private boolean _acceptUnclassifiedTaxa = false;
    private int _filterSite = 0;
    private boolean _addPrivateMetadata = false;
    private Timer _timer = new Timer();
    private be.ac.vub.bsb.cooccurrence.measures.Matrix _normalizedCountMatrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix();
    private boolean _normalizedCountMatrixSet = false;
    private String _fileWithColumnsToRemove = "";
    private String _fileWithTotalCountsByWhichToDivide = "";
    private double _minCount = 0.0d;
    private int _downsample = 0;
    private boolean _removeSuspiciousSamples = false;
    public static String NA = "NA";
    public static String METADATA_FILE = "pds.metadata";
    public static String MERGED_METADATA_FILE = "pds.metadata.extended";
    public static Integer[] SEQUENCE_WINDOWS = {13, 35};
    public static String SEX_KEY = "Gender";
    public static int PATIENTID_INDEX = 0;
    public static int BODYSITE_INDEX = 1;
    public static int CENTER_INDEX = 2;
    public static int REPETITIONNUM_INDEX = 3;
    public static int VISITNUM_INDEX = 4;
    public static int TOTALVISITNUM_INDEX = 5;
    public static int SEX_INDEX = 6;
    public static int HQV13_INDEX = 7;
    public static int HQV35_INDEX = 8;
    public static int HQV69_INDEX = 9;
    public static int LQV13_INDEX = 10;
    public static int LQV35_INDEX = 11;
    public static int LQV69_INDEX = 12;
    public static int SAMPLEID_INDEX = 13;
    public static int RANDSID_INDEX = 14;
    public static int DBGAPID_INDEX = 15;
    public static int SN_INDEX = 16;
    public static int START_INDEX = 17;
    public static String START_KEY = "Start";
    public static int SITE_INDEX = 18;
    public static String SITE_KEY = "Site";
    public static int CONMEDS_INDEX = 19;
    public static int GENDER_INDEX = 20;
    public static int AGEENR_INDEX = 21;
    public static String AGEENR_KEY = "Age";
    public static int ETHNIC_INDEX = 22;
    public static String ETHNIC_KEY = "Ethnie";
    public static int SYSTOLE_INDEX = 25;
    public static String SYSTOLE_KEY = "Systole";
    public static int DIASTOLE_INDEX = 26;
    public static String DIASTOLE_KEY = "Diastole";
    public static int PULSE_INDEX = 27;
    public static String PULSE_KEY = "Pulse";
    public static int WEIGHT_IN_KILOGRAMS_INDEX = 29;
    public static String WEIGHT_KEY = "Weight_in_kg";
    public static int HEIGHT_IN_CENTIMETERS_INDEX = 31;
    public static String HEIGHT_KEY = "Height_in_cm";
    public static int BODY_MASS_INDEX = 32;
    public static String BMI_KEY = "BMI";
    public static int TOBACCO_USE_INDEX = 34;
    public static int TOBACCO_WAY_OF_CONSUMPTION_INDEX = 35;
    public static int TOBACCO_CIGARETTE_FREQUENCY_INDEX = 36;
    public static String CIGARETTE_FREQUENCY_KEY = "Cigarettes_freq";
    public static int TOBACCO_CHEW_FREQUENCY_INDEX = 37;
    public static String CHEWING_FREQUENCY_KEY = "Tobacco_chewing_freq";
    public static int TEMPERATURE_CELSIUS_INDEX = 39;
    public static String TEMPERATURE_KEY = "Temperature";
    public static int VAGINA_INTROITUS_PH_INDEX = 57;
    public static String VAGINAL_PH_KEY = "Vagina_pH";
    public static int FORNIX_PH_INDEX = 58;
    public static String FORNIX_PH_KEY = "Fornix_pH";
    public static int VAGINA_IRRITATIONS_INDEX = 59;
    public static String VAGINA_IRRITATIONS_KEY = "Vagina_irritations";
    public static int ST_TYPE_INDEX = 65;
    public static int ST_TYPE_SITE_INDEX = 66;
    public static int DNA_EXTRACTION_METHOD_INDEX = 67;
    public static int STUDY_DAY_PROCESSED_INDEX = 68;
    public static int ORIGINATING_SITE_INDEX = 69;
    public static int CURRENT_SITE_INDEX = 70;
    public static int SPECIMEN_TYPE_INDEX = 71;
    public static int SPECIMEN_PARENT_INDEX = 72;
    public static int SEQPLAT_INDEX = 73;
    public static int REDUNDANT_SEQUENCE_INDEX = 74;
    public static String HOUSTON = "Houston";
    public static String ST_LOUIS = "St. Louis";
    public static String[] SUSPICIOUS_SAMPLES_OLD = {"700014824.PPS", "700033486.PPS", "700103327.may1", "700024165.may1", "700109013.may1", "700016175.PPS", "700098980.may1", "700024521.may1", "700014948.PPS", "700113154.may1", "700111499.may1", "700105600.may1", "700037714.may1", "700105824.may1", "700108828.may1", "700103651.may1", "700024486.may1", "700038452.may1", "700105755.may1", "700023015.may1", "700015464.PPS", "700023672.may1", "700099119.may1", "700099121.may1", "700108830.may1"};
    public static String[] SUSPICIOUS_SAMPLES = {"700114872", "700114517", "700114939", "700109354", "700038500", "700024617", "700023672", "700109013", "700113154", "700113150", "700114723", "700114878", "700114448", "700113029", "700113568", "700113099", "700033486", "700114641", "700113529", "700095267", "700113642", "700111210", "700113025", "700114637", "700102163", "700114725", "700113101", "700113570", "700114450", "700114513", "700113031", "700095463", "700113644", "700015231", "700097471", "700016176", "700016176", "700035987", "700114937", "700114590", "700016096"};
    public static String MEDIAN = "median";
    public static String MEAN = "mean";
    public static String RANDOM_CHOICE = "random_choice";
    public static String NONE = "no_treatment";
    public static String DEFAULT_TECHNICAL_REPLICA_TREATMENT = MEDIAN;
    public static String UNCLASSIFIED = "unclassified";
    private static String TECHNICAL_REPLICA_MERGED_KEY = "technical_replica_merged";
    private static String REPLICA_MERGED_FLAG = "_replicaMerged";
    private static String[] SPECIAL_SAMPLES = {"positive_control.PPS", "positive_control.may1", "positive_gd.PPS", "positive_mock.PPS", "water_blank.PPS", "water_blank.may1"};
    private static String NUMERIC_REGEXP = "[\\d]*";
    public static String testOTU = "Bacteroides_Stool";
    public static boolean test = false;

    public HMP16SRNAPatSchlossParser() {
    }

    public HMP16SRNAPatSchlossParser(String str) {
        setDataFolder(str);
    }

    private List<String> getPrivateMetadataToInclude() {
        ArrayList arrayList = new ArrayList();
        arrayList.add(AGEENR_KEY);
        arrayList.add(ETHNIC_KEY);
        arrayList.add(SYSTOLE_KEY);
        arrayList.add(DIASTOLE_KEY);
        arrayList.add(PULSE_KEY);
        arrayList.add(WEIGHT_KEY);
        arrayList.add(HEIGHT_KEY);
        arrayList.add(BMI_KEY);
        arrayList.add(CIGARETTE_FREQUENCY_KEY);
        arrayList.add(CHEWING_FREQUENCY_KEY);
        arrayList.add(TEMPERATURE_KEY);
        arrayList.add(VAGINAL_PH_KEY);
        arrayList.add(FORNIX_PH_KEY);
        arrayList.add(VAGINA_IRRITATIONS_KEY);
        return arrayList;
    }

    private Map<String, Integer> getPrivateMetadataVersusIndex() {
        HashMap hashMap = new HashMap();
        hashMap.put(START_KEY, Integer.valueOf(START_INDEX));
        hashMap.put(SITE_KEY, Integer.valueOf(SITE_INDEX));
        hashMap.put(AGEENR_KEY, Integer.valueOf(AGEENR_INDEX));
        hashMap.put(ETHNIC_KEY, Integer.valueOf(ETHNIC_INDEX));
        hashMap.put(SYSTOLE_KEY, Integer.valueOf(SYSTOLE_INDEX));
        hashMap.put(DIASTOLE_KEY, Integer.valueOf(DIASTOLE_INDEX));
        hashMap.put(PULSE_KEY, Integer.valueOf(PULSE_INDEX));
        hashMap.put(WEIGHT_KEY, Integer.valueOf(WEIGHT_IN_KILOGRAMS_INDEX));
        hashMap.put(HEIGHT_KEY, Integer.valueOf(HEIGHT_IN_CENTIMETERS_INDEX));
        hashMap.put(BMI_KEY, Integer.valueOf(BODY_MASS_INDEX));
        hashMap.put(CIGARETTE_FREQUENCY_KEY, Integer.valueOf(TOBACCO_CIGARETTE_FREQUENCY_INDEX));
        hashMap.put(CHEWING_FREQUENCY_KEY, Integer.valueOf(TOBACCO_CHEW_FREQUENCY_INDEX));
        hashMap.put(TEMPERATURE_KEY, Integer.valueOf(TEMPERATURE_CELSIUS_INDEX));
        hashMap.put(VAGINAL_PH_KEY, Integer.valueOf(VAGINA_INTROITUS_PH_INDEX));
        hashMap.put(FORNIX_PH_KEY, Integer.valueOf(FORNIX_PH_INDEX));
        hashMap.put(VAGINA_IRRITATIONS_KEY, Integer.valueOf(VAGINA_IRRITATIONS_INDEX));
        return hashMap;
    }

    private void loadMetaData() {
        String str = isAddPrivateMetadata() ? String.valueOf(getDataFolder()) + File.separator + MERGED_METADATA_FILE : String.valueOf(getDataFolder()) + File.separator + METADATA_FILE;
        this._logger.info("Loading metadata from file " + str);
        try {
            this._metadataMatrix = MatrixFactory.importFromFile(FileFormat.CSV, str, new Object[]{"\t"});
            this._logger.info("Parsed metadata for " + this._metadataMatrix.getSize()[0] + " samples.");
            String str2 = "";
            String str3 = "";
            this._logger.info("rows: " + this._metadataMatrix.getSize(0) + ", columns: " + this._metadataMatrix.getSize(1));
            ArrayList arrayList = new ArrayList();
            for (long[] jArr : this._metadataMatrix.allCoordinates()) {
                int intValue = Long.valueOf(jArr[0]).intValue();
                int intValue2 = Long.valueOf(jArr[1]).intValue();
                if (intValue2 == 0) {
                    if (intValue > 0 && !str3.isEmpty() && !str2.isEmpty()) {
                        this._sampleVersusMetadataLookup.put(String.valueOf(str3) + "." + str2, arrayList);
                    }
                    str3 = this._metadataMatrix.getAsString(jArr);
                    arrayList = new ArrayList();
                } else if (intValue2 == 1) {
                    str2 = this._metadataMatrix.getAsString(jArr);
                } else {
                    arrayList.add(this._metadataMatrix.getAsString(jArr));
                }
            }
            this._sampleVersusMetadataLookup.put(String.valueOf(str3) + "." + str2, arrayList);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (MatrixException e2) {
            e2.printStackTrace();
        }
    }

    private void mergeCountAndMetadataMatrix() {
        be.ac.vub.bsb.cooccurrence.measures.Matrix matrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix(getOutputMatrix().getMatrix().rows() + this._metadataTempMatrix.keySet().size(), getOutputMatrix().getMatrix().columns());
        matrix.setColNames(getOutputMatrix().getColNames());
        getOutputMatrix().copyMetadataToTargetMatrix(matrix);
        int i = 0;
        while (i < getOutputMatrix().getMatrix().rows()) {
            matrix.setRowName(i, getOutputMatrix().getRowName(i));
            matrix.setRow(i, getOutputMatrix().getMatrix().viewRow(i).toArray());
            i++;
        }
        for (String str : this._metadataTempMatrix.keySet()) {
            String replace = str.replace(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR, PathwayinferenceConstants.REACTION_SUBREACTION_JOINER);
            if (str.matches(NUMERIC_REGEXP)) {
                replace = "id_" + str;
            }
            matrix.setRowName(i, replace);
            matrix.setRow(i, this._metadataTempMatrix.get(str));
            matrix.setRowMetaAnnotation(i, FeatureMatrixLoader.IS_FEATURE_ATTRIB, FeatureMatrixLoader.FEATURE_PRESENT_VALUE);
            i++;
        }
        setOutputMatrix(matrix);
    }

    private void filterZeroLines() {
        this._zeroLines = new ArrayList();
        for (int i = 0; i < getOutputMatrix().getMatrix().rows(); i++) {
            if (VectorToolsProvider.allEntriesAreZero(getOutputMatrix().getMatrix().viewRow(i))) {
                this._zeroLines.add(getOutputMatrix().getRowName(i));
            }
        }
        this._logger.info("Skipping taxa: " + this._zeroLines.toString() + ", because they have zero counts in all samples.");
        setOutputMatrix(MatrixToolsProvider.getSubmatrixWithoutRows(getOutputMatrix(), this._zeroLines));
    }

    private void filterSite() {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (int i2 = 0; i2 < getOutputMatrix().getMatrix().columns(); i2++) {
            String colName = getOutputMatrix().getColName(i2);
            if (!this._sampleVersusMetadataLookup.containsKey(colName)) {
                this._logger.warn("Sample identifier " + colName + " not contained in metadata map! It is removed as well to prevent possible overlap.");
                arrayList.add(Integer.valueOf(i2));
                i++;
            } else if (getFilterSite() == 1 && this._sampleVersusMetadataLookup.get(colName).get(SITE_INDEX).contains(HOUSTON)) {
                arrayList.add(Integer.valueOf(i2));
                this._numOverlappingSamples++;
            } else if (getFilterSite() == 2 && this._sampleVersusMetadataLookup.get(colName).get(SITE_INDEX).contains(ST_LOUIS)) {
                arrayList.add(Integer.valueOf(i2));
                this._numOverlappingSamples++;
            } else if (getFilterSite() > 0 && !this._sampleVersusMetadataLookup.get(colName).get(SITE_INDEX).contains(HOUSTON) && !this._sampleVersusMetadataLookup.get(colName).get(SITE_INDEX).contains(ST_LOUIS)) {
                this._logger.warn("Sample identifier " + colName + " neither contains Houston nor St. Louis! It is removed as well to prevent possible overlap.");
                this._logger.info("Metadata: " + this._sampleVersusMetadataLookup.get(colName).toString());
                this._logger.info("Entry in metadata: " + this._sampleVersusMetadataLookup.get(colName).get(SITE_INDEX).toString());
                arrayList.add(Integer.valueOf(i2));
            }
        }
        this._logger.info("Number of site-specific columns to remove: " + arrayList.size());
        this._logger.info("Indices of site-specific columns to remove: " + arrayList.toString());
        this._logger.info("Number of samples not found in metadata: " + i);
        setOutputMatrix(MatrixToolsProvider.getSubMatrixWithoutColIndices(getOutputMatrix(), arrayList));
    }

    private void filterOverlap() {
        this._logger.info("Filter overlap");
        ArrayList arrayList = new ArrayList();
        Set<String> arrayToSet = ArrayTools.arrayToSet(SUSPICIOUS_SAMPLES);
        for (int i = 0; i < getOutputMatrix().getMatrix().columns(); i++) {
            String colName = getOutputMatrix().getColName(i);
            String str = colName.contains(".") ? colName.split("\\.")[0] : colName;
            if (isRemoveSuspiciousSamples() && arrayToSet.contains(str)) {
                this._logger.info("Removing suspicious sample: " + str);
                arrayList.add(Integer.valueOf(i));
            }
            if (!this._sampleVersusMetadataLookup.containsKey(colName)) {
                this._logger.warn("Sample identifier " + colName + " not contained in metadata map!");
            } else if (this._sampleVersusMetadataLookup.get(colName).get(REPETITIONNUM_INDEX).equals(NA)) {
                arrayList.add(Integer.valueOf(i));
                this._numOverlappingSamples++;
            }
        }
        this._logger.info("Number of overlapping columns to remove: " + arrayList.size());
        this._logger.info("Indices of overlapping columns to remove: " + arrayList.toString());
        if (getSeqWindow() == 13 || getSeqWindow() == 35) {
            for (String str2 : SPECIAL_SAMPLES) {
                if (getOutputMatrix().hasColName(str2)) {
                    arrayList.add(Integer.valueOf(getOutputMatrix().getIndexOfColName(str2)));
                    this._logger.info("Removing control sample: " + str2);
                }
            }
        }
        setOutputMatrix(MatrixToolsProvider.getSubMatrixWithoutColIndices(getOutputMatrix(), arrayList));
    }

    private void treatTechnicalReplica() {
        if (getReplicaTreatment().equals(NONE)) {
            return;
        }
        String[] colNames = getOutputMatrix().getColNames();
        new ArrayList();
        new HashSet();
        for (String str : colNames) {
            if (this._sampleVersusMetadataLookup.containsKey(str)) {
                List<String> list = this._sampleVersusMetadataLookup.get(str);
                String str2 = String.valueOf(list.get(PATIENTID_INDEX)) + HelpFormatter.DEFAULT_OPT_PREFIX + list.get(BODYSITE_INDEX);
                if (this._technicalReplica.containsKey(str2)) {
                    this._technicalReplica.get(str2).add(str);
                } else {
                    HashSet hashSet = new HashSet();
                    hashSet.add(str);
                    this._technicalReplica.put(str2, hashSet);
                }
            } else {
                this._logger.warn("Sample identifier " + str + " not contained in metadata map!");
            }
        }
        be.ac.vub.bsb.cooccurrence.measures.Matrix matrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix(getOutputMatrix().getMatrix().rows(), this._technicalReplica.keySet().size());
        matrix.setRowNames(getOutputMatrix().getRowNames());
        DoubleMatrix1D denseDoubleMatrix1D = new DenseDoubleMatrix1D(getOutputMatrix().getMatrix().rows());
        int i = 0;
        Iterator<String> it = this._technicalReplica.keySet().iterator();
        while (it.hasNext()) {
            Set<String> set = this._technicalReplica.get(it.next());
            String next = set.iterator().next();
            if (set.size() == 1 || getReplicaTreatment().equals(RANDOM_CHOICE)) {
                denseDoubleMatrix1D = getOutputMatrix().getMatrix().viewColumn(getOutputMatrix().getIndexOfColName(next));
            } else if (getReplicaTreatment().equals(MEDIAN) || getReplicaTreatment().equals(MEAN)) {
                denseDoubleMatrix1D = new DenseDoubleMatrix1D(getOutputMatrix().getMatrix().rows());
                for (int i2 = 0; i2 < getOutputMatrix().getMatrix().rows(); i2++) {
                    int i3 = 0;
                    DenseDoubleMatrix1D denseDoubleMatrix1D2 = new DenseDoubleMatrix1D(set.size());
                    Iterator<String> it2 = set.iterator();
                    while (it2.hasNext()) {
                        denseDoubleMatrix1D2.set(i3, getOutputMatrix().getMatrix().get(i2, getOutputMatrix().getIndexOfColName(it2.next())));
                        i3++;
                    }
                    if (getReplicaTreatment().equals(MEDIAN)) {
                        denseDoubleMatrix1D.set(i2, StatsProvider.getMedian(denseDoubleMatrix1D2, true));
                    } else if (getReplicaTreatment().equals(MEAN)) {
                        denseDoubleMatrix1D.set(i2, StatsProvider.getMean(denseDoubleMatrix1D2, true));
                    }
                }
            }
            matrix.setColName(i, next);
            matrix.setColumn(i, denseDoubleMatrix1D.toArray());
            if (set.size() > 1) {
                matrix.setColumnMetaAnnotation(i, TECHNICAL_REPLICA_MERGED_KEY, true);
            }
            i++;
        }
        setOutputMatrix(matrix);
    }

    private be.ac.vub.bsb.cooccurrence.measures.Matrix mergeTwoTaxonCountMatrices(be.ac.vub.bsb.cooccurrence.measures.Matrix matrix, be.ac.vub.bsb.cooccurrence.measures.Matrix matrix2) {
        return new be.ac.vub.bsb.cooccurrence.measures.Matrix();
    }

    private String getSampleGivenBodysite(Set<String> set, String str) {
        for (String str2 : set) {
            if (str2.contains(str)) {
                return str2;
            }
        }
        return "";
    }

    private void mergeColumnsPatientWiseInBodysiteSpecificMatrixWithNaNs() {
        TreeMap treeMap = new TreeMap();
        new HashSet();
        int i = 0;
        Object obj = "";
        for (int i2 = 0; i2 < getOutputMatrix().getMatrix().columns(); i2++) {
            String str = String.valueOf(getOutputMatrix().getColName(i2).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[0]) + "." + getOutputMatrix().getColName(i2).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[1];
            String str2 = this._sampleVersusMetadataLookup.get(str).get(PATIENTID_INDEX);
            if (getReplicaTreatment().equals(NONE)) {
                str2 = String.valueOf(str2) + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + this._sampleVersusMetadataLookup.get(str).get(VISITNUM_INDEX);
                this._logger.info("updated patient id: " + str2);
            }
            String colName = getOutputMatrix().getColName(i2);
            this._idVersusSampleId.put(str2, str);
            if (treeMap.containsKey(str2)) {
                ((Set) treeMap.get(str2)).add(colName);
            } else {
                HashSet hashSet = new HashSet();
                hashSet.add(colName);
                treeMap.put(str2, hashSet);
            }
        }
        be.ac.vub.bsb.cooccurrence.measures.Matrix matrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix(getOutputMatrix().getMatrix().rows(), treeMap.keySet().size());
        int i3 = 0;
        for (String str3 : treeMap.keySet()) {
            for (int i4 = 0; i4 < getOutputMatrix().getMatrix().rows(); i4++) {
                String str4 = getOutputMatrix().getRowName(i4).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[1];
                if (!str4.equals(obj)) {
                    String sampleGivenBodysite = getSampleGivenBodysite((Set) treeMap.get(str3), str4);
                    i = sampleGivenBodysite.equals("") ? -100 : getOutputMatrix().getIndexOfColName(sampleGivenBodysite);
                }
                matrix.getMatrix().set(i4, i3, i < 0 ? Double.NaN : getOutputMatrix().getMatrix().get(i4, i));
                if (i3 == 0) {
                    matrix.setRowName(i4, getOutputMatrix().getRowName(i4));
                }
                obj = str4;
            }
            matrix.setColName(i3, "id_" + str3);
            i3++;
        }
        setOutputMatrix(matrix);
    }

    private void makeBodysiteSpecificRowsAllowingNaNs() {
        ArrayList<String> arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList<String> arrayList3 = new ArrayList();
        arrayList3.addAll(ArrayTools.arrayToSet(getOutputMatrix().getRowNames()));
        Collections.sort(arrayList3);
        for (String str : getOutputMatrix().getColNames()) {
            String str2 = str.split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[2];
            if (!arrayList.contains(str2)) {
                arrayList.add(str2);
            }
        }
        Collections.sort(arrayList);
        for (String str3 : arrayList) {
            for (String str4 : arrayList3) {
                if (str4.isEmpty()) {
                    this._logger.error("Empty taxon name for body site " + str3 + "!");
                } else {
                    arrayList2.add(String.valueOf(str4) + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + str3);
                }
            }
        }
        be.ac.vub.bsb.cooccurrence.measures.Matrix matrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix(arrayList2.size(), getOutputMatrix().getMatrix().columns());
        for (int i = 0; i < arrayList2.size(); i++) {
            double[] dArr = new double[getOutputMatrix().getMatrix().columns()];
            String str5 = ((String) arrayList2.get(i)).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[1];
            int indexOfRowName = getOutputMatrix().getIndexOfRowName(((String) arrayList2.get(i)).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[0]);
            for (int i2 = 0; i2 < getOutputMatrix().getMatrix().columns(); i2++) {
                if (str5.equals(getOutputMatrix().getColName(i2).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[2])) {
                    dArr[i2] = getOutputMatrix().getMatrix().get(indexOfRowName, i2);
                } else {
                    dArr[i2] = Double.NaN;
                }
            }
            matrix.setRowName(i, (String) arrayList2.get(i));
            matrix.setRow(i, dArr);
        }
        matrix.setColNames(getOutputMatrix().getColNames());
        getOutputMatrix().copyMetadataToTargetMatrix(matrix);
        setOutputMatrix(matrix);
    }

    private void makeBodysiteSpecificRows(Map<String, Object> map) {
        ArrayList<String> arrayList = new ArrayList();
        ArrayList<String> arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        HashMap hashMap = new HashMap();
        new HashSet();
        Set<String> filterIncompletePatientIdentifiersFromMap = filterIncompletePatientIdentifiersFromMap(map);
        for (String str : getOutputMatrix().getColNames()) {
            String str2 = str.split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[0];
            String str3 = str.split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[1];
            String str4 = str.split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[2];
            String str5 = this._sampleVersusMetadataLookup.get(String.valueOf(str2) + "." + str3).get(PATIENTID_INDEX);
            if (filterIncompletePatientIdentifiersFromMap.contains(str5)) {
                this._logger.info("Patient identifier " + str5 + " is skipped, because not all of its associated body sites have been sampled.");
            } else {
                if (hashMap.containsKey(str5)) {
                    ((Set) hashMap.get(str5)).add(str);
                } else {
                    HashSet hashSet = new HashSet();
                    hashSet.add(str);
                    hashMap.put(str5, hashSet);
                }
                if (!arrayList.contains(str5)) {
                    arrayList.add(str5);
                }
            }
            if (!arrayList2.contains(str4)) {
                arrayList2.add(str4);
            }
        }
        this._logger.info("Number of patient identifiers where all body sites were sampled: " + arrayList.size());
        this._logger.info("Number of unique body sites: " + arrayList2.size());
        this._logger.info("Unique body sites: " + arrayList2.toString());
        Collections.sort(arrayList2);
        for (String str6 : arrayList2) {
            for (String str7 : getOutputMatrix().getRowNames()) {
                if (str7.isEmpty()) {
                    this._logger.error("Empty row name for body site " + str6 + "!");
                } else {
                    arrayList3.add(String.valueOf(str7) + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + str6);
                }
            }
        }
        TreeMap treeMap = new TreeMap();
        String str8 = "";
        for (String str9 : arrayList) {
            this._logger.info("Processing patient identifier: " + str9);
            double[] dArr = new double[arrayList3.size()];
            int i = 0;
            Set<String> set = (Set) hashMap.get(str9);
            for (String str10 : arrayList2) {
                boolean z = false;
                for (String str11 : set) {
                    if (str11.contains(str10)) {
                        str8 = str11;
                        z = true;
                    }
                }
                if (z) {
                    int indexOfColName = getOutputMatrix().getIndexOfColName(str8);
                    for (int i2 = 0; i2 < getOutputMatrix().getMatrix().rows(); i2++) {
                        dArr[i] = getOutputMatrix().getMatrix().get(i2, indexOfColName);
                        i++;
                    }
                } else if (str10.equals(HMP16SRNAPatSchlossFullTableAnalyser.VAGINAL_INTROITUS) || str10.equals(HMP16SRNAPatSchlossFullTableAnalyser.MID_VAGINA) || str10.equals(HMP16SRNAPatSchlossFullTableAnalyser.POSTERIOR_FORNIX)) {
                    for (int i3 = 0; i3 < getOutputMatrix().getMatrix().rows(); i3++) {
                        dArr[i] = 0.0d;
                        i++;
                    }
                } else {
                    this._logger.info("Body site " + str10 + " is missing for patient identifier " + str9 + "!");
                    for (int i4 = 0; i4 < getOutputMatrix().getMatrix().rows(); i4++) {
                        dArr[i] = Double.NaN;
                        i++;
                    }
                }
            }
            treeMap.put(str9, dArr);
        }
        be.ac.vub.bsb.cooccurrence.measures.Matrix matrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix(arrayList3.size(), arrayList.size());
        matrix.setRowNames(ArrayTools.m243toArray((List<String>) arrayList3));
        int i5 = 0;
        for (String str12 : treeMap.keySet()) {
            matrix.setColName(i5, str12);
            matrix.setColumn(i5, (double[]) treeMap.get(str12));
            i5++;
        }
        setOutputMatrix(matrix);
    }

    private void initMetadata() {
        double[] dArr = new double[getOutputMatrix().getMatrix().columns()];
        new ArrayList();
        ArrayList arrayList = new ArrayList();
        if (!isBodysiteSpecTaxa()) {
            Iterator<String> it = this._sampleVersusMetadataLookup.keySet().iterator();
            while (it.hasNext()) {
                List<String> list = this._sampleVersusMetadataLookup.get(it.next());
                for (int i = 0; i < list.size(); i++) {
                    if ((i == PATIENTID_INDEX || i == BODYSITE_INDEX || (getReplicaTreatment().equals(NONE) && i == CENTER_INDEX)) && !arrayList.contains(list.get(i))) {
                        arrayList.add(list.get(i));
                    }
                }
            }
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                this._metadataTempMatrix.put((String) it2.next(), new double[getOutputMatrix().getMatrix().columns()]);
            }
        }
        this._metadataTempMatrix.put(SEX_KEY, new double[getOutputMatrix().getMatrix().columns()]);
        if (isAddPrivateMetadata()) {
            Iterator<String> it3 = getPrivateMetadataToInclude().iterator();
            while (it3.hasNext()) {
                this._metadataTempMatrix.put(it3.next(), new double[getOutputMatrix().getMatrix().columns()]);
            }
            this._metadataTempMatrix.put(START_KEY, new double[getOutputMatrix().getMatrix().columns()]);
            if (getFilterSite() == 0) {
                this._metadataTempMatrix.put(SITE_KEY, new double[getOutputMatrix().getMatrix().columns()]);
            }
            Map<Integer, String> codeVersusMedication = HMP16SRNAPatSchlossPrivateMetadataMerger.codeVersusMedication();
            this._logger.info(codeVersusMedication.toString());
            for (int i2 = 0; i2 < codeVersusMedication.keySet().size() - 1; i2++) {
                double[] dArr2 = new double[getOutputMatrix().getMatrix().columns()];
                ArrayTools.initializeWithGivenValue(dArr2, 0.0d);
                this._logger.debug("code: " + (i2 + 1));
                this._logger.debug("medication: " + codeVersusMedication.get(Integer.valueOf(i2 + 1)));
                this._metadataTempMatrix.put(codeVersusMedication.get(Integer.valueOf(i2 + 1)), dArr2);
            }
            double[] dArr3 = new double[getOutputMatrix().getMatrix().columns()];
            ArrayTools.initializeWithGivenValue(dArr3, 0.0d);
            this._metadataTempMatrix.put(codeVersusMedication.get(99), dArr3);
            this._logger.info(codeVersusMedication.get(99));
        }
    }

    private void updateMetadata(List<String> list, int i) {
        if (!isBodysiteSpecTaxa()) {
            for (String str : this._metadataTempMatrix.keySet()) {
                if (list.contains(str)) {
                    this._metadataTempMatrix.get(str)[i] = 1.0d;
                } else if (str.equals(TEMPERATURE_KEY) || str.equals(WEIGHT_KEY) || str.equals(HEIGHT_KEY) || str.equals(BMI_KEY) || str.equals(SYSTOLE_KEY) || str.equals(DIASTOLE_KEY) || str.equals(PULSE_KEY) || str.equals(AGEENR_KEY) || str.equals(VAGINAL_PH_KEY) || str.equals(FORNIX_PH_KEY)) {
                    this._logger.info("Setting value to NaN for absent metadatum " + str);
                    this._metadataTempMatrix.get(str)[i] = Double.NaN;
                } else {
                    this._metadataTempMatrix.get(str)[i] = 0.0d;
                }
            }
        }
        if (isAddPrivateMetadata()) {
            if (list.get(GENDER_INDEX).isEmpty()) {
                this._metadataTempMatrix.get(SEX_KEY)[i] = Double.NaN;
            } else {
                this._metadataTempMatrix.get(SEX_KEY)[i] = Double.parseDouble(list.get(GENDER_INDEX));
            }
        } else if (list.contains("female")) {
            this._metadataTempMatrix.get(SEX_KEY)[i] = 1.0d;
        } else {
            this._metadataTempMatrix.get(SEX_KEY)[i] = 0.0d;
        }
        if (isAddPrivateMetadata()) {
            Map<String, Integer> privateMetadataVersusIndex = getPrivateMetadataVersusIndex();
            for (String str2 : getPrivateMetadataToInclude()) {
                try {
                    this._metadataTempMatrix.get(str2)[i] = Double.parseDouble(list.get(privateMetadataVersusIndex.get(str2).intValue()));
                } catch (NumberFormatException e) {
                    e.printStackTrace();
                    this._metadataTempMatrix.get(str2)[i] = Double.NaN;
                }
            }
            if (list.contains(HOUSTON)) {
                if (getFilterSite() == 1) {
                    this._logger.error("Column " + i + " (" + getOutputMatrix().getColName(i) + ") has " + HOUSTON + " as site, although this site was supposed to be removed!");
                } else if (getFilterSite() == 0) {
                    this._metadataTempMatrix.get(SITE_KEY)[i] = 1.0d;
                }
            } else if (!list.contains(ST_LOUIS)) {
                this._logger.error("Neither " + HOUSTON + " nor " + ST_LOUIS + " set as Site for column index " + i + "!");
            } else if (getFilterSite() == 2) {
                this._logger.error("Column " + i + " (" + getOutputMatrix().getColName(i) + ") has " + ST_LOUIS + " as site, although this site was supposed to be removed!");
            } else if (getFilterSite() == 0) {
                this._metadataTempMatrix.get(SITE_KEY)[i] = 0.0d;
            }
            String str3 = list.get(START_INDEX);
            if (str3.isEmpty()) {
                this._metadataTempMatrix.get(START_KEY)[i] = Double.NaN;
            } else {
                Calendar calendar = Calendar.getInstance();
                calendar.set(2008, 0, 1);
                int parseInt = Integer.parseInt(str3.split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[1]);
                String str4 = str3.split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[0];
                int i2 = 0;
                if (str4.equals("Q1")) {
                    i2 = 2;
                } else if (str4.equals("Q2")) {
                    i2 = 5;
                } else if (str4.equals("Q3")) {
                    i2 = 8;
                } else if (str4.equals("Q4")) {
                    i2 = 11;
                }
                Calendar calendar2 = Calendar.getInstance();
                calendar2.set(parseInt, i2, 1);
                this._metadataTempMatrix.get(START_KEY)[i] = DiverseTools.round(Long.valueOf(calendar2.getTimeInMillis() - calendar.getTimeInMillis()).doubleValue() / 8.64E7d, 0);
            }
            Map<Integer, String> codeVersusMedication = HMP16SRNAPatSchlossPrivateMetadataMerger.codeVersusMedication();
            String str5 = list.get(CONMEDS_INDEX);
            if (str5.isEmpty()) {
                Iterator<Integer> it = codeVersusMedication.keySet().iterator();
                while (it.hasNext()) {
                    this._metadataTempMatrix.get(codeVersusMedication.get(it.next()))[i] = Double.NaN;
                }
                return;
            }
            if (!str5.contains("|")) {
                this._metadataTempMatrix.get(codeVersusMedication.get(Integer.valueOf(Integer.parseInt(str5.split(":")[0]))))[i] = 1.0d;
                return;
            }
            for (String str6 : str5.split("\\|")) {
                if (str6.contains(":")) {
                    String str7 = str6.split("\\:")[0];
                    if (str7.isEmpty()) {
                        this._logger.warn("Medical description " + str5 + " contains an empty entry.");
                    } else {
                        this._metadataTempMatrix.get(codeVersusMedication.get(Integer.valueOf(Integer.parseInt(str7))))[i] = 1.0d;
                    }
                } else {
                    this._logger.warn("Medical description " + str5 + " contains an entry with missing delimiter.");
                }
            }
        }
    }

    private Set<String> filterIncompletePatientIdentifiersFromMap(Map<String, Object> map) {
        HashSet hashSet = new HashSet();
        new HashSet();
        for (String str : map.keySet()) {
            this._logger.info("Patient " + ((Object) str) + " has been sampled for " + map.get(str));
            if (map.get(str) instanceof String) {
                hashSet.add(str.toString());
            } else if (map.get(str) instanceof Collection) {
                Set set = (Set) map.get(str);
                if (((set.contains(HMP16SRNAPatSchlossFullTableAnalyser.VAGINAL_INTROITUS) || set.contains(HMP16SRNAPatSchlossFullTableAnalyser.MID_VAGINA) || set.contains(HMP16SRNAPatSchlossFullTableAnalyser.POSTERIOR_FORNIX)) && set.size() < 18) || set.size() < 15) {
                    hashSet.add(str.toString());
                }
            }
        }
        HashSet hashSet2 = new HashSet();
        hashSet2.addAll(map.keySet());
        hashSet2.removeAll(hashSet);
        this._logger.info("Patient identifiers for which all body sites have been sampled: " + hashSet2.toString());
        this._logger.info("Patient identifiers to filter: " + hashSet.toString());
        return hashSet;
    }

    private void filterUnclassifiedTaxa() {
        this._unclassifiedLines = new ArrayList();
        for (String str : getOutputMatrix().getRowNames()) {
            if (str.contains(UNCLASSIFIED)) {
                this._logger.info("Filtering out taxon " + str + ", because it is not classified.");
                this._unclassifiedLines.add(str);
            }
        }
        setOutputMatrix(MatrixToolsProvider.getSubmatrixWithoutRows(getOutputMatrix(), this._unclassifiedLines));
    }

    private void splitBodySiteSpecificMatrixObject(be.ac.vub.bsb.cooccurrence.measures.Matrix matrix, String str) {
        String str2 = "";
        ArrayList arrayList = new ArrayList();
        new be.ac.vub.bsb.cooccurrence.measures.Matrix();
        for (int i = 0; i < matrix.getMatrix().rows(); i++) {
            String str3 = matrix.getRowName(i).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[matrix.getRowName(i).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER).length - 1];
            if (!str3.equals(str2) && i > 1) {
                this._logger.info("Saving matrix specific for body site: " + str2);
                MatrixToolsProvider.getSubMatrix(matrix, arrayList).writeMatrix(String.valueOf(str) + PathwayinferenceConstants.PATH_SEPARATOR + "hmp_16S_v" + getSeqWindow() + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + str2 + ".txt", "\t", true, true, true);
                arrayList = new ArrayList();
            }
            arrayList.add(matrix.getRowName(i));
            str2 = str3;
        }
        this._logger.info("Saving matrix specific for body site: " + str2);
        MatrixToolsProvider.getSubMatrix(matrix, arrayList).writeMatrix(String.valueOf(str) + PathwayinferenceConstants.PATH_SEPARATOR + "hmp_16S_v" + getSeqWindow() + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + str2 + ".txt", "\t", true, true, true);
        new ArrayList();
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v298, types: [java.util.List] */
    /* JADX WARN: Type inference failed for: r7v0, types: [be.ac.vub.bsb.parsers.hmp.HMP16SRNAPatSchlossParser] */
    public void parse() {
        String colName;
        this._higherLevelTaxaFiltered = false;
        this._timer = new Timer();
        this._timer.startTimer();
        loadMetaData();
        if (!getMergeTaxonomicLevel().isEmpty()) {
            setAcceptUnclassifiedTaxa(false);
        }
        if (getDownsample() > 0) {
            if (isMerge()) {
                this._logger.error("Downsampling in combination with merging is not yet implemented!", new IllegalArgumentException());
            }
            HMP16SRNAPatSchlossFullTableAnalyser hMP16SRNAPatSchlossFullTableAnalyser = new HMP16SRNAPatSchlossFullTableAnalyser(HMP16SRNAPatSchlossParsingHelper.getCountTableLocation(getSeqWindow(), isPhylotype(), getDataFolder()));
            hMP16SRNAPatSchlossFullTableAnalyser.downsample(Integer.valueOf(getDownsample()));
            this._logger.info("Downsampled count table to " + getDownsample() + " counts by sample.");
            setNormalizedCountMatrix(hMP16SRNAPatSchlossFullTableAnalyser.getTaxonCountMatrix());
        }
        ArrayList arrayList = new ArrayList();
        if (isMerge()) {
            for (Integer num : SEQUENCE_WINDOWS) {
                arrayList.add(num);
            }
        } else {
            arrayList.add(Integer.valueOf(getSeqWindow()));
        }
        HMP16SRNAPatSchlossParsingHelper hMP16SRNAPatSchlossParsingHelper = new HMP16SRNAPatSchlossParsingHelper();
        hMP16SRNAPatSchlossParsingHelper.setDataFolder(getDataFolder());
        int i = 0;
        be.ac.vub.bsb.cooccurrence.measures.Matrix matrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix();
        if (getWgsMatrix().isEmpty()) {
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                hMP16SRNAPatSchlossParsingHelper.set16SRegion(((Integer) it.next()).intValue());
                if (this._normalizedCountMatrixSet) {
                    hMP16SRNAPatSchlossParsingHelper.setCountMatrix(getNormalizedCountMatrix());
                }
                hMP16SRNAPatSchlossParsingHelper.setPhylotype(isPhylotype());
                hMP16SRNAPatSchlossParsingHelper.setSequenceIdentityThreshold(getSequenceIdentityThreshold());
                hMP16SRNAPatSchlossParsingHelper.setMergeRepetitiveTaxa(isMergeRepetitiveTaxa());
                hMP16SRNAPatSchlossParsingHelper.setAcceptUnclassifiedTaxa(isAcceptUnclassifiedTaxa());
                hMP16SRNAPatSchlossParsingHelper.setMergeUpToGivenLevel(getMergeTaxonomicLevel());
                hMP16SRNAPatSchlossParsingHelper.processCountMatrix();
                matrix = i > 0 ? mergeTwoTaxonCountMatrices(matrix, hMP16SRNAPatSchlossParsingHelper.getOutputMatrix()) : hMP16SRNAPatSchlossParsingHelper.getOutputMatrix();
                i++;
            }
            setOutputMatrix(matrix);
        } else {
            setOutputMatrix(getWgsMatrix());
        }
        if (isPhylotype() && isIncludeMetadata()) {
            ArrayList arrayList2 = new ArrayList();
            this._logger.info("700105298.may1 " + this._sampleVersusMetadataLookup.get("700105298.may1"));
            for (int i2 = 0; i2 < getOutputMatrix().getMatrix().columns(); i2++) {
                String colName2 = getOutputMatrix().getColName(i2);
                if (!this._sampleVersusMetadataLookup.containsKey(colName2)) {
                    String str = String.valueOf(colName2.split("\\.")[0]) + ".PPS";
                    if (this._sampleVersusMetadataLookup.containsKey(str)) {
                        getOutputMatrix().setColName(i2, str);
                        this._logger.info("Sample id " + str + " was changed from dataset may1 to dataset PPS based on absence/presence in metadata file.");
                    } else {
                        this._logger.error("No metadata entry for sample " + str + "!");
                        this._samplesWithMissingMetadata.add(str.split("\\.")[0]);
                        arrayList2.add(Integer.valueOf(i2));
                    }
                }
            }
            this._logger.info("Missing sample metadata for " + this._samplesWithMissingMetadata.size() + " samples.");
            setOutputMatrix(MatrixToolsProvider.getSubMatrixWithoutColIndices(getOutputMatrix(), arrayList2));
        }
        filterOverlap();
        if (getFilterSite() > 0) {
            filterSite();
        }
        treatTechnicalReplica();
        if (isIncludeMetadata() && !isBodysiteSpecTaxa()) {
            initMetadata();
        }
        ArrayList arrayList3 = new ArrayList();
        for (int i3 = 0; i3 < getOutputMatrix().getMatrix().columns(); i3++) {
            if (this._sampleVersusMetadataLookup.containsKey(getOutputMatrix().getColName(i3))) {
                arrayList3 = (List) this._sampleVersusMetadataLookup.get(getOutputMatrix().getColName(i3));
                colName = (String.valueOf(getOutputMatrix().getColName(i3)) + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + ((String) arrayList3.get(BODYSITE_INDEX)).replace(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR, HelpFormatter.DEFAULT_OPT_PREFIX).replace(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER, HelpFormatter.DEFAULT_OPT_PREFIX)).replace(".", PathwayinferenceConstants.REACTION_SUBREACTION_JOINER);
            } else {
                colName = getOutputMatrix().getColName(i3);
                this._logger.warn("Could not get metadata for sample identifier " + colName + "!");
            }
            getOutputMatrix().setColName(i3, colName);
            if (isIncludeMetadata() && !isBodysiteSpecTaxa()) {
                updateMetadata(arrayList3, i3);
            }
        }
        if (isIncludeMetadata() && !isBodysiteSpecTaxa()) {
            mergeCountAndMetadataMatrix();
        }
        if (!getReplicaTreatment().equals(NONE)) {
            for (int i4 = 0; i4 < getOutputMatrix().getMatrix().columns(); i4++) {
                if (getOutputMatrix().hasColumnMetaAnnotation(i4, TECHNICAL_REPLICA_MERGED_KEY) && ((Boolean) getOutputMatrix().getColumnMetaAnnotation(i4, TECHNICAL_REPLICA_MERGED_KEY)).booleanValue()) {
                    getOutputMatrix().setColName(i4, String.valueOf(getOutputMatrix().getColName(i4)) + REPLICA_MERGED_FLAG);
                }
            }
        }
        if (isBodysiteSpecTaxa()) {
            if (!getFileWithColumnsToRemove().isEmpty()) {
                MatrixFilterer matrixFilterer = new MatrixFilterer();
                matrixFilterer.setMatrix(getOutputMatrix());
                matrixFilterer.setNamesOfColumnsToDiscard((Set<String>) new OneColumnSetParser(getFileWithColumnsToRemove()).parse());
                matrixFilterer.filter();
                setOutputMatrix(matrixFilterer.getFilteredMatrix());
                this._logger.info(matrixFilterer.toString());
            }
            if (!getFileWithTotalCountsByWhichToDivide().isEmpty()) {
                AbundanceMatrixNormalizer abundanceMatrixNormalizer = new AbundanceMatrixNormalizer();
                abundanceMatrixNormalizer.setAbundanceMatrix(getOutputMatrix());
                abundanceMatrixNormalizer.setExternalData(AbundanceMatrixNormalizer.loadExternalData(getFileWithTotalCountsByWhichToDivide(), "", 0, 1));
                abundanceMatrixNormalizer.setStandardizationMethods(AbundanceMatrixNormalizer.COLUMN_NORMALIZATION_BY_DIVISION_WITH_EXTERNAL_DATA);
                abundanceMatrixNormalizer.normalize();
                setOutputMatrix(abundanceMatrixNormalizer.getNormalizedAbundanceMatrix());
                this._logger.info(abundanceMatrixNormalizer.toString());
            }
        }
        TreeMap treeMap = new TreeMap();
        new TreeSet();
        if (isBodysiteSpecTaxa() && !isBodysiteSpecTaxaAllowingNaN()) {
            for (int i5 = 0; i5 < getOutputMatrix().getMatrix().columns(); i5++) {
                String str2 = String.valueOf(getOutputMatrix().getColName(i5).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[0]) + "." + getOutputMatrix().getColName(i5).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[1];
                String str3 = this._sampleVersusMetadataLookup.get(str2).get(PATIENTID_INDEX);
                if (getReplicaTreatment().equals(NONE)) {
                    str3 = String.valueOf(str3) + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + this._sampleVersusMetadataLookup.get(str2).get(VISITNUM_INDEX);
                }
                String str4 = getOutputMatrix().getColName(i5).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[2];
                if (treeMap.containsKey(str3)) {
                    ((TreeSet) treeMap.get(str3)).add(str4);
                } else {
                    TreeSet treeSet = new TreeSet();
                    treeSet.add(str4);
                    treeMap.put(str3, treeSet);
                }
                this._idVersusSampleId.put(str3, str2);
            }
            makeBodysiteSpecificRows(treeMap);
            ArrayList arrayList4 = new ArrayList();
            for (int i6 = 0; i6 < getOutputMatrix().getMatrix().columns(); i6++) {
                if (VectorToolsProvider.getNumberOfNaNs(getOutputMatrix().getMatrix().viewColumn(i6)) > 0) {
                    arrayList4.add(Integer.valueOf(i6));
                }
            }
            this._logger.info("Removing " + arrayList4.size() + " patient identifier columns because they have missing body sites.");
            setOutputMatrix(MatrixToolsProvider.getSubMatrixWithoutColIndices(getOutputMatrix(), arrayList4));
            if (isIncludeMetadata()) {
                if (isAddPrivateMetadata()) {
                    initMetadata();
                    for (int i7 = 0; i7 < getOutputMatrix().getMatrix().columns(); i7++) {
                        updateMetadata(this._sampleVersusMetadataLookup.get(this._idVersusSampleId.get(getOutputMatrix().getColName(i7))), i7);
                    }
                    mergeCountAndMetadataMatrix();
                } else {
                    be.ac.vub.bsb.cooccurrence.measures.Matrix matrix2 = new be.ac.vub.bsb.cooccurrence.measures.Matrix(1, getOutputMatrix().getMatrix().columns());
                    matrix2.setRowName(0, SEX_KEY);
                    matrix2.setRowMetaAnnotation(0, FeatureMatrixLoader.IS_FEATURE_ATTRIB, FeatureMatrixLoader.FEATURE_PRESENT_VALUE);
                    for (int i8 = 0; i8 < getOutputMatrix().getMatrix().columns(); i8++) {
                        TreeSet treeSet2 = (TreeSet) treeMap.get(getOutputMatrix().getColName(i8));
                        if (treeSet2.contains(HMP16SRNAPatSchlossFullTableAnalyser.MID_VAGINA) || treeSet2.contains(HMP16SRNAPatSchlossFullTableAnalyser.VAGINAL_INTROITUS) || treeSet2.contains(HMP16SRNAPatSchlossFullTableAnalyser.POSTERIOR_FORNIX)) {
                            matrix2.getMatrix().set(0, i8, 1.0d);
                        } else {
                            matrix2.getMatrix().set(0, i8, 0.0d);
                        }
                    }
                    setOutputMatrix(MatrixToolsProvider.mergeMatricesRowWise(getOutputMatrix(), matrix2));
                }
            }
            String[] strArr = new String[getOutputMatrix().getColNames().length];
            for (int i9 = 0; i9 < getOutputMatrix().getColNames().length; i9++) {
                strArr[i9] = "id_" + getOutputMatrix().getColName(i9);
            }
            getOutputMatrix().setColNames(strArr);
        } else if (isBodysiteSpecTaxa() && isBodysiteSpecTaxaAllowingNaN()) {
            makeBodysiteSpecificRowsAllowingNaNs();
            if (isNaContainingBodysiteSpecWithCrossLinks()) {
                mergeColumnsPatientWiseInBodysiteSpecificMatrixWithNaNs();
            }
            if (isIncludeMetadata()) {
                initMetadata();
                for (int i10 = 0; i10 < getOutputMatrix().getMatrix().columns(); i10++) {
                    updateMetadata(this._sampleVersusMetadataLookup.get(isNaContainingBodysiteSpecWithCrossLinks() ? this._idVersusSampleId.get(getOutputMatrix().getColName(i10).replace("id_", "")) : String.valueOf(getOutputMatrix().getColName(i10).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[0]) + "." + getOutputMatrix().getColName(i10).split(PathwayinferenceConstants.REACTION_SUBREACTION_JOINER)[1]), i10);
                }
                mergeCountAndMetadataMatrix();
            }
        }
        if (!isAcceptUnclassifiedTaxa()) {
            filterUnclassifiedTaxa();
        }
        if (getMinCount() > 0.0d) {
            HashSet hashSet = new HashSet();
            for (int i11 = 0; i11 < getOutputMatrix().getColNames().length; i11++) {
                double sum = StatsProvider.getSum(getOutputMatrix().getMatrix().viewColumn(i11), true);
                if (sum < getMinCount()) {
                    hashSet.add(Integer.valueOf(i11));
                    System.out.println("Removing low total read count (" + sum + ") column " + getOutputMatrix().getColName(i11));
                }
            }
            setOutputMatrix(MatrixToolsProvider.getSubMatrixWithoutColIndices(getOutputMatrix(), hashSet));
        }
        if (isFilterZeroLines()) {
            filterZeroLines();
        }
        if (test) {
            int indexOfRowName = getOutputMatrix().getIndexOfRowName(testOTU);
            System.out.println(getOutputMatrix().getMatrix().viewRow(indexOfRowName));
            int indexOfColName = getOutputMatrix().getIndexOfColName("700013596_may1_Stool");
            int indexOfColName2 = getOutputMatrix().getIndexOfColName("700014390_PPS_Stool");
            int indexOfColName3 = getOutputMatrix().getIndexOfColName("700111286_may1_Posterior-fornix");
            System.out.println("Count for " + testOTU + " in 700013596_may1_Stool: " + getOutputMatrix().getMatrix().get(indexOfRowName, indexOfColName));
            System.out.println("Count for " + testOTU + " in 700014390_PPS_Stool: " + getOutputMatrix().getMatrix().get(indexOfRowName, indexOfColName2));
            System.out.println("Count for " + testOTU + " in 700111286_may1_Posterior-fornix: " + getOutputMatrix().getMatrix().get(indexOfRowName, indexOfColName3));
        }
        this._logger.info("Parsing done.");
    }

    public void filterIncompletePatientIdentifiersFromFile(String str) {
        new HashSet();
        TwoColumnHashMapParser twoColumnHashMapParser = new TwoColumnHashMapParser(str);
        twoColumnHashMapParser.setKeyColumn(0);
        twoColumnHashMapParser.setValueColumn(1);
        filterIncompletePatientIdentifiersFromMap(twoColumnHashMapParser.parse());
    }

    public void splitBodySiteSpecificMatrix(String str, String str2) {
        be.ac.vub.bsb.cooccurrence.measures.Matrix matrix = new be.ac.vub.bsb.cooccurrence.measures.Matrix();
        matrix.readMatrix(str, false);
        splitBodySiteSpecificMatrixObject(matrix, str2);
    }

    public void filterTaxa(String str) {
        HashSet hashSet = new HashSet();
        if (isIncludeMetadata()) {
            for (int i = 0; i < getOutputMatrix().getMatrix().rows(); i++) {
                if (getOutputMatrix().hasRowMetaAnnotation(i, FeatureMatrixLoader.IS_FEATURE_ATTRIB) && getOutputMatrix().getRowMetaAnnotation(i, FeatureMatrixLoader.IS_FEATURE_ATTRIB).equals(FeatureMatrixLoader.FEATURE_PRESENT_VALUE)) {
                    hashSet.add(getOutputMatrix().getRowName(i));
                }
            }
        }
        if (isBodysiteSpecTaxa()) {
            setOutputMatrix(MatrixToolsProvider.filterBodysiteSpecTaxaInMatrix(getOutputMatrix(), str, hashSet));
        } else {
            setOutputMatrix(MatrixToolsProvider.filterTaxaInMatrix(getOutputMatrix(), str, hashSet));
        }
        this._higherLevelTaxaFiltered = true;
    }

    public void setMerge(boolean z) {
        this._merge = z;
    }

    public boolean isMerge() {
        return this._merge;
    }

    public void setIncludeMetadata(boolean z) {
        this._includeMetadata = z;
    }

    public boolean isIncludeMetadata() {
        return this._includeMetadata;
    }

    public void setSeqWindow(int i) {
        this._seqWindow = i;
    }

    public int getSeqWindow() {
        return this._seqWindow;
    }

    public void setReplicaTreatment(String str) {
        this._replicaTreatment = str;
    }

    public String getReplicaTreatment() {
        return this._replicaTreatment;
    }

    public void setPhylotype(boolean z) {
        this._phylotype = z;
    }

    public boolean isPhylotype() {
        return this._phylotype;
    }

    public void setSequenceIdentityThreshold(double d) {
        this._sequenceIdentityThreshold = d;
    }

    public double getSequenceIdentityThreshold() {
        return this._sequenceIdentityThreshold;
    }

    public void setMergeRepetitiveTaxa(boolean z) {
        this._mergeRepetitiveTaxa = z;
    }

    public boolean isMergeRepetitiveTaxa() {
        return this._mergeRepetitiveTaxa;
    }

    public void setBodysiteSpecTaxa(boolean z) {
        this._bodysiteSpecTaxa = z;
    }

    public boolean isBodysiteSpecTaxa() {
        return this._bodysiteSpecTaxa;
    }

    public void setBodysiteSpecTaxaAllowingNaN(boolean z) {
        this._bodysiteSpecTaxaAllowingNaN = z;
    }

    public boolean isBodysiteSpecTaxaAllowingNaN() {
        return this._bodysiteSpecTaxaAllowingNaN;
    }

    public void setNaContainingBodysiteSpecWithCrossLinks(boolean z) {
        this._naContainingBodysiteSpecWithCrossLinks = z;
    }

    public boolean isNaContainingBodysiteSpecWithCrossLinks() {
        return this._naContainingBodysiteSpecWithCrossLinks;
    }

    public void setFilterSite(int i) {
        this._filterSite = i;
    }

    public int getFilterSite() {
        return this._filterSite;
    }

    public void setDownsample(int i) {
        this._downsample = i;
    }

    public int getDownsample() {
        return this._downsample;
    }

    public double getMinCount() {
        return this._minCount;
    }

    public void setMinCount(double d) {
        this._minCount = d;
    }

    public boolean isFilterZeroLines() {
        return this._filterZeroLines;
    }

    public void setFilterZeroLines(boolean z) {
        this._filterZeroLines = z;
    }

    public void setRemoveSuspiciousSamples(boolean z) {
        this._removeSuspiciousSamples = z;
    }

    public boolean isRemoveSuspiciousSamples() {
        return this._removeSuspiciousSamples;
    }

    public void setDataFolder(String str) {
        this._dataFolder = str;
    }

    public String getDataFolder() {
        return this._dataFolder;
    }

    public void setOutputMatrix(be.ac.vub.bsb.cooccurrence.measures.Matrix matrix) {
        this._outputMatrix = matrix;
    }

    public be.ac.vub.bsb.cooccurrence.measures.Matrix getOutputMatrix() {
        return this._outputMatrix;
    }

    public void setAcceptUnclassifiedTaxa(boolean z) {
        this._acceptUnclassifiedTaxa = z;
    }

    public boolean isAcceptUnclassifiedTaxa() {
        return this._acceptUnclassifiedTaxa;
    }

    public void setAddPrivateMetadata(boolean z) {
        this._addPrivateMetadata = z;
    }

    public boolean isAddPrivateMetadata() {
        return this._addPrivateMetadata;
    }

    public void setFileWithColumnsToRemove(String str) {
        this._fileWithColumnsToRemove = str;
    }

    public String getFileWithColumnsToRemove() {
        return this._fileWithColumnsToRemove;
    }

    public void setFileWithTotalCountsByWhichToDivide(String str) {
        this._fileWithTotalCountsByWhichToDivide = str;
    }

    public String getFileWithTotalCountsByWhichToDivide() {
        return this._fileWithTotalCountsByWhichToDivide;
    }

    public void setNormalizedCountMatrix(be.ac.vub.bsb.cooccurrence.measures.Matrix matrix) {
        this._normalizedCountMatrix = matrix;
        this._normalizedCountMatrixSet = true;
    }

    public be.ac.vub.bsb.cooccurrence.measures.Matrix getNormalizedCountMatrix() {
        return this._normalizedCountMatrix;
    }

    public String getMergeTaxonomicLevel() {
        return this._mergeTaxonomicLevel;
    }

    public void setMergeTaxonomicLevel(String str) {
        this._mergeTaxonomicLevel = str;
    }

    public void setWgsMatrix(be.ac.vub.bsb.cooccurrence.measures.Matrix matrix) {
        this._wgsMatrix = matrix;
    }

    public be.ac.vub.bsb.cooccurrence.measures.Matrix getWgsMatrix() {
        return this._wgsMatrix;
    }

    public Map<String, List<String>> getSampleVersusMetadataLookup() {
        return this._sampleVersusMetadataLookup;
    }

    public String toString() {
        this._timer.stopTimer();
        String str = String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf("# ") + "HMP 16S Data Parser" + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Date=" + new Date().toString() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Runtime in ms=" + this._timer.getRuntime().toString() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# PARAMETER" + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# 16S variable region=" + getSeqWindow() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# OTU counts parsed=" + (!isPhylotype()) + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Phylotype counts parsed=" + isPhylotype() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        String str2 = getDownsample() > 0 ? String.valueOf(str) + "# Downsampled count number=" + getDownsample() + AbstractFormatter.DEFAULT_ROW_SEPARATOR : String.valueOf(str) + "# Normalized input matrix set=" + this._normalizedCountMatrixSet + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        if (getMergeTaxonomicLevel().isEmpty()) {
            str2 = String.valueOf(str2) + "# Counts of OTUs assigned to same taxon summed=" + isMergeRepetitiveTaxa() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        }
        String str3 = String.valueOf(str2) + "# Unclassified taxa accepted=" + isAcceptUnclassifiedTaxa() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        if (!getMergeTaxonomicLevel().isEmpty()) {
            str3 = String.valueOf(str3) + "# Taxa merged up to given taxonomic level=" + getMergeTaxonomicLevel() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        }
        String str4 = String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(str3) + "# Site removed (" + HOUSTON + "=1, " + ST_LOUIS + "=2, no removal=0)=" + getFilterSite() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Suspicious samples (see Dirk's report) removed=" + isRemoveSuspiciousSamples() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Technical replica treatment (technical replica share the same patient identifier and body site)=" + getReplicaTreatment() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Sequence identity threshold=" + getSequenceIdentityThreshold() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Rows are taxon-location units (cross-bodysite matrix)=" + isBodysiteSpecTaxa() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Bodysite-specific matrix is allowed to contain NaN=" + isBodysiteSpecTaxaAllowingNaN() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Bodysite-specific matrix with NaNs is converted into a cross-bodysite matrix=" + isNaContainingBodysiteSpecWithCrossLinks() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Features (metadata) included in output matrix=" + isIncludeMetadata() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Private metadata included in output matrix=" + isAddPrivateMetadata() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Zero lines filtered=" + isFilterZeroLines() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Higher-level taxa filtered=" + this._higherLevelTaxaFiltered + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        if (getMinCount() > 0.0d) {
            str4 = String.valueOf(str4) + "# Columns were filtered that had a total read count below=" + getMinCount() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        }
        if (!getFileWithColumnsToRemove().isEmpty()) {
            str4 = String.valueOf(str4) + "# Columns removed given in file=" + getFileWithColumnsToRemove() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        }
        if (!getFileWithTotalCountsByWhichToDivide().isEmpty()) {
            str4 = String.valueOf(str4) + "# Column entries divided by total counts given in file=" + getFileWithTotalCountsByWhichToDivide() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        }
        String str5 = String.valueOf(String.valueOf(String.valueOf(String.valueOf(str4) + "# REPORT" + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Number of rows (taxa and, if included, features) in output matrix=" + getOutputMatrix().getMatrix().rows() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Number of columns (samples) in output matrix=" + getOutputMatrix().getMatrix().columns() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Number of removed overlapping samples (marked with " + NA + " in " + METADATA_FILE + ")=" + this._numOverlappingSamples + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        if (isFilterZeroLines()) {
            str5 = String.valueOf(str5) + "# Removed zero-occurrence taxa (and features, if included)=" + this._zeroLines.toString() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        }
        if (isIncludeMetadata()) {
            str5 = String.valueOf(str5) + "# Number of metadata parsed=" + this._metadataTempMatrix.keySet().size() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
            if (!this._samplesWithMissingMetadata.isEmpty()) {
                str5 = String.valueOf(String.valueOf(str5) + "# Number of samples absent in the metadata file=" + this._samplesWithMissingMetadata.size() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Samples absent in the metadata file=" + this._samplesWithMissingMetadata.toString() + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
            }
        }
        String str6 = String.valueOf(String.valueOf(String.valueOf(str5) + "# Number of unclassified OTUs removed=" + this._unclassifiedLines.size() + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# REMARKS" + AbstractFormatter.DEFAULT_ROW_SEPARATOR) + "# Special samples " + ArrayTools.stringArrayToString(SPECIAL_SAMPLES, ", ") + " removed." + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        if (getFilterSite() > 0) {
            str6 = String.valueOf(str6) + "# Samples without Site information removed." + AbstractFormatter.DEFAULT_ROW_SEPARATOR;
        }
        return str6;
    }

    public static void main(String[] strArr) {
        System.out.println("Saving to: /Users/karoline/Documents/Documents_Karoline/BSB_Lab/Collaborations/Chris-Quince/Hackathon/HMP-analysis/Data/mothur-hq-OTU-V35");
        HMP16SRNAPatSchlossParser hMP16SRNAPatSchlossParser = new HMP16SRNAPatSchlossParser();
        hMP16SRNAPatSchlossParser.setDataFolder("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/HMP/HMP_full/HMP_public_data/16S/PatSchloss3.0/");
        hMP16SRNAPatSchlossParser.setSeqWindow(35);
        hMP16SRNAPatSchlossParser.setMinCount(1000.0d);
        hMP16SRNAPatSchlossParser.setSequenceIdentityThreshold(100.0d);
        hMP16SRNAPatSchlossParser.setFilterSite(2);
        hMP16SRNAPatSchlossParser.setRemoveSuspiciousSamples(true);
        hMP16SRNAPatSchlossParser.setReplicaTreatment(NONE);
        hMP16SRNAPatSchlossParser.setMerge(false);
        hMP16SRNAPatSchlossParser.setMergeRepetitiveTaxa(false);
        hMP16SRNAPatSchlossParser.setMergeTaxonomicLevel(TaxonomyProvider.GENUS);
        hMP16SRNAPatSchlossParser.setAcceptUnclassifiedTaxa(true);
        hMP16SRNAPatSchlossParser.setPhylotype(false);
        hMP16SRNAPatSchlossParser.setIncludeMetadata(false);
        hMP16SRNAPatSchlossParser.setAddPrivateMetadata(true);
        hMP16SRNAPatSchlossParser.setBodysiteSpecTaxa(false);
        hMP16SRNAPatSchlossParser.setBodysiteSpecTaxaAllowingNaN(false);
        hMP16SRNAPatSchlossParser.setNaContainingBodysiteSpecWithCrossLinks(false);
        hMP16SRNAPatSchlossParser.setFilterZeroLines(true);
        hMP16SRNAPatSchlossParser.parse();
        String str = hMP16SRNAPatSchlossParser.getFilterSite() == 1 ? "_stlouis" : "";
        if (hMP16SRNAPatSchlossParser.getFilterSite() == 2) {
            str = "_houston";
        }
        hMP16SRNAPatSchlossParser.getOutputMatrix().writeMatrix(String.valueOf("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Collaborations/Chris-Quince/Hackathon/HMP-analysis/Data/mothur-hq-OTU-V35") + "/" + ("hmp_hq_v" + hMP16SRNAPatSchlossParser.getSeqWindow() + "_otus" + str + ".txt"), "\t", true, true);
        System.out.println(hMP16SRNAPatSchlossParser.toString());
        IOTools.exportStringToFile(hMP16SRNAPatSchlossParser.toString(), String.valueOf("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Collaborations/Chris-Quince/Hackathon/HMP-analysis/Data/mothur-hq-OTU-V35") + "/HMP_16SParser_configuration" + str + ".txt");
    }
}
