package be.ac.vub.bsb.parsers.chaffron;

import be.ac.ulb.bigre.pathwayinference.core.io.TwoColumnHashMapParser;
import be.ac.vub.bsb.cooccurrence.cmd.CooccurrenceAnalyser;
import be.ac.vub.bsb.cooccurrence.conversion.MatrixFilterer;
import be.ac.vub.bsb.cooccurrence.measures.Matrix;
import be.ac.vub.bsb.cooccurrence.util.ArrayTools;
import be.ac.vub.bsb.parsers.util.ParserTools;
import cern.colt.matrix.impl.DenseDoubleMatrix1D;
import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.commons.math3.geometry.VectorFormat;
import org.apache.log4j.Logger;

/* JADX WARN: Classes with same name are omitted:
  input_file:be/ac/vub/bsb/parsers/chaffron/PresenceAbsenceMatrixGenerator.class
 */
/* loaded from: input_file:lib/be_ac_vub_bsb_parsers.jar:be/ac/vub/bsb/parsers/chaffron/PresenceAbsenceMatrixGenerator.class */
public class PresenceAbsenceMatrixGenerator {
    public static String[] CLUSTERING_DISTANCES = {"01", "02", "03", "05", SchemaSymbols.ATTVAL_TRUE_1, "2", "15"};
    public static String DEFAULT_DISTANCE = "05";
    public static String FILE_NAME_ROOT = "gg_sample_details_otus_filtered_file.d.";
    public static String FILE_SUFFIX = ".tsv";
    private MatrixFilterer _filterer;
    private String _envPropFileLocation = "";
    private boolean _filterFeatureMatrix = false;
    private String _folder = "";
    private String _otuClusteringDistance = DEFAULT_DISTANCE;
    private Integer _otuMinOccurrenceFilter = 0;
    private Integer _sampleMinOccurrenceFilter = 0;
    private Map<String, Object> _samplesVsEnvKeyword = new HashMap();
    private Set<String> _envProps = new HashSet();
    private Set<String> _samples = new TreeSet();
    private Map<String, Set<String>> _otuVersusSamples = new TreeMap();
    private Map<String, Object> _otuVersusLineage = new TreeMap();
    private Logger _logger = Logger.getLogger(getClass().getPackage().toString());
    private Integer _numberOfNonZeros = 0;
    private boolean _matrixBuilt = false;
    private Matrix _matrix = new Matrix();
    private Matrix _featureMatrix = new Matrix();

    public PresenceAbsenceMatrixGenerator() {
    }

    public PresenceAbsenceMatrixGenerator(String str, String str2) {
        setFolder(str);
        setOtuClusteringDistance(str2);
    }

    private void loadEnvPropsFile() {
        TwoColumnHashMapParser twoColumnHashMapParser = new TwoColumnHashMapParser(getEnvPropFileLocation());
        twoColumnHashMapParser.setKeyColumn(3);
        twoColumnHashMapParser.setValueColumn(2);
        this._samplesVsEnvKeyword = twoColumnHashMapParser.parse();
        for (String str : this._samplesVsEnvKeyword.keySet()) {
            if (!this._samplesVsEnvKeyword.get(str).toString().contains("[")) {
                this._envProps.add(this._samplesVsEnvKeyword.get(str).toString().toLowerCase());
            }
        }
    }

    private Matrix matrixFilter(Matrix matrix, boolean z) {
        if (getOtuMinOccurrenceFilter().intValue() <= 0 && getSampleMinOccurrenceFilter().intValue() <= 0) {
            return matrix;
        }
        String str = "";
        String str2 = "";
        if (getOtuMinOccurrenceFilter().intValue() > 0) {
            str = MatrixFilterer.ROW_MIN_OCCURRENCE;
            str2 = getOtuMinOccurrenceFilter().toString();
        }
        if (getSampleMinOccurrenceFilter().intValue() > 0 && !z) {
            if (str.isEmpty()) {
                str = MatrixFilterer.COLUMN_MIN_OCCURRENCE;
                str2 = getSampleMinOccurrenceFilter().toString();
            } else {
                str = String.valueOf(str) + CooccurrenceAnalyser.ITEM_SEPARATOR + MatrixFilterer.COLUMN_MIN_OCCURRENCE;
                str2 = String.valueOf(str2) + CooccurrenceAnalyser.ITEM_SEPARATOR + getSampleMinOccurrenceFilter().toString();
            }
        }
        this._filterer = new MatrixFilterer(matrix);
        this._filterer.setFilterMethods(str);
        this._filterer.setFilterNumbers(str2);
        this._filterer.filter();
        return this._filterer.getFilteredMatrix();
    }

    public void parse() {
        ParserTools.checkFolder(getFolder());
        if (!ArrayTools.arrayToSet((Object[]) CLUSTERING_DISTANCES).contains(getOtuClusteringDistance())) {
            this._logger.fatal("The selected OTU cluster distance is not supported!");
            System.exit(-1);
        }
        if (!getEnvPropFileLocation().isEmpty()) {
            loadEnvPropsFile();
        }
        String str = String.valueOf(getFolder()) + File.separator + FILE_NAME_ROOT + getOtuClusteringDistance() + FILE_SUFFIX;
        new HashSet();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String[] split = readLine.split("\t");
                String str2 = split[4];
                String replace = split[6].replace(VectorFormat.DEFAULT_SEPARATOR, CooccurrenceAnalyser.LINEAGE_SEPARATOR).replace("\"", "");
                String str3 = replace.split(CooccurrenceAnalyser.LINEAGE_SEPARATOR)[replace.split(CooccurrenceAnalyser.LINEAGE_SEPARATOR).length - 1];
                this._samples.add(str2);
                if (this._otuVersusSamples.containsKey(str3)) {
                    this._otuVersusSamples.get(str3).add(str2);
                } else {
                    HashSet hashSet = new HashSet();
                    hashSet.add(str2);
                    this._otuVersusSamples.put(str3, hashSet);
                }
                if (!this._otuVersusLineage.containsKey(str3)) {
                    this._otuVersusLineage.put(str3, replace);
                }
            }
            bufferedReader.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
        int size = this._otuVersusSamples.keySet().size();
        int size2 = this._samples.size();
        this._logger.info("Number of OTUs=" + size);
        this._logger.info("Number of samples=" + size2);
        setMatrix(new Matrix(size, size2));
        new DenseDoubleMatrix1D(size2);
        int i = 0;
        for (String str4 : this._otuVersusSamples.keySet()) {
            this._logger.info("Processing otu " + str4);
            Set<String> set = this._otuVersusSamples.get(str4);
            int i2 = 0;
            DenseDoubleMatrix1D denseDoubleMatrix1D = new DenseDoubleMatrix1D(size2);
            for (String str5 : this._samples) {
                if (i == 0) {
                    getMatrix().setColName(i2, str5);
                }
                if (set.contains(str5)) {
                    denseDoubleMatrix1D.set(i2, 1.0d);
                    this._numberOfNonZeros = Integer.valueOf(this._numberOfNonZeros.intValue() + 1);
                } else {
                    denseDoubleMatrix1D.set(i2, 0.0d);
                }
                i2++;
            }
            getMatrix().setRow(i, denseDoubleMatrix1D.toArray());
            getMatrix().setRowName(i, str4);
            i++;
        }
        this._logger.info("Filtering out all OTUs with less than " + getOtuMinOccurrenceFilter() + " occurrences across samples...");
        setMatrix(matrixFilter(getMatrix(), false));
        if (!getEnvPropFileLocation().isEmpty()) {
            TreeMap treeMap = new TreeMap();
            Iterator<String> it = this._envProps.iterator();
            while (it.hasNext()) {
                treeMap.put(it.next(), new ArrayList());
            }
            for (String str6 : getMatrix().getColNames()) {
                if (this._samplesVsEnvKeyword.containsKey(str6)) {
                    String obj = this._samplesVsEnvKeyword.get(str6).toString();
                    for (String str7 : this._envProps) {
                        if (str7.equals(obj)) {
                            ((List) treeMap.get(str7)).add(Double.valueOf(1.0d));
                        } else {
                            ((List) treeMap.get(str7)).add(Double.valueOf(0.0d));
                        }
                    }
                } else {
                    Iterator<String> it2 = this._envProps.iterator();
                    while (it2.hasNext()) {
                        ((List) treeMap.get(it2.next())).add(Double.valueOf(Double.NaN));
                    }
                }
            }
            this._featureMatrix = new Matrix(treeMap.keySet().size(), getMatrix().getMatrix().columns());
            int i3 = 0;
            for (String str8 : treeMap.keySet()) {
                this._featureMatrix.setRow(i3, ArrayTools.m445toArray((List<Double>) treeMap.get(str8)));
                this._featureMatrix.setRowName(i3, str8);
                i3++;
            }
            for (int i4 = 0; i4 < this._featureMatrix.getMatrix().columns(); i4++) {
                this._featureMatrix.setColName(i4, getMatrix().getColName(i4));
            }
            if (isFilterFeatureMatrix()) {
                setFeatureMatrix(matrixFilter(getFeatureMatrix(), true));
            }
        }
        this._matrixBuilt = true;
    }

    public void exportLineages(String str) {
        if (this._matrixBuilt) {
            ParserTools.exportMapToTwoColumnFile(this._otuVersusLineage, str, "\t");
        } else {
            this._logger.warn("Build matrix first!");
        }
    }

    public void setFolder(String str) {
        this._folder = str;
        this._numberOfNonZeros = 0;
        this._matrixBuilt = false;
    }

    public String getFolder() {
        return this._folder;
    }

    public void setOtuClusteringDistance(String str) {
        this._otuClusteringDistance = str;
    }

    public String getOtuClusteringDistance() {
        return this._otuClusteringDistance;
    }

    public void setOtuMinOccurrenceFilter(Integer num) {
        this._otuMinOccurrenceFilter = num;
    }

    public Integer getOtuMinOccurrenceFilter() {
        return this._otuMinOccurrenceFilter;
    }

    public void setSampleMinOccurrenceFilter(Integer num) {
        this._sampleMinOccurrenceFilter = num;
    }

    public Integer getSampleMinOccurrenceFilter() {
        return this._sampleMinOccurrenceFilter;
    }

    public void setEnvPropFileLocation(String str) {
        this._envPropFileLocation = str;
    }

    public String getEnvPropFileLocation() {
        return this._envPropFileLocation;
    }

    private void setMatrix(Matrix matrix) {
        this._matrix = matrix;
    }

    public Matrix getMatrix() {
        return this._matrix;
    }

    private void setFeatureMatrix(Matrix matrix) {
        this._featureMatrix = matrix;
    }

    public Matrix getFeatureMatrix() {
        return this._featureMatrix;
    }

    public void setFilterFeatureMatrix(boolean z) {
        this._filterFeatureMatrix = z;
    }

    public boolean isFilterFeatureMatrix() {
        return this._filterFeatureMatrix;
    }

    public String toString() {
        String str = String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf("# ") + "Chaffron global co-occurrence network 16S Data Parser\n") + "# Date=" + new Date().toString() + "\n") + "# PARAMETER\n") + "# OTU clustering distance=" + getOtuClusteringDistance() + "\n") + "# OTU minimum occurrence across samples=" + getOtuMinOccurrenceFilter() + "\n") + "# Sample minimum occurrence across OTUs=" + getSampleMinOccurrenceFilter() + "\n") + "# OUTPUT\n") + "# Row number=" + this._matrix.getMatrix().rows() + "\n") + "# Column number=" + this._matrix.getMatrix().columns() + "\n") + "# Number of non-zero entries=" + this._numberOfNonZeros + "\n";
        if (this._matrixBuilt && (getOtuMinOccurrenceFilter().intValue() > 0 || getSampleMinOccurrenceFilter().intValue() > 0)) {
            str = String.valueOf(str) + this._filterer.toString() + "\n";
        }
        if (!getEnvPropFileLocation().isEmpty()) {
            str = String.valueOf(String.valueOf(String.valueOf(String.valueOf(str) + "# Number of environmental features set=" + this._envProps.size() + "\n") + "# Environmental feature matrix filtered (only otu-minimum-occurrence is applied to feature matrix, where OTUs are features)=" + isFilterFeatureMatrix() + "\n") + "# Environmental feature matrix row number=" + getFeatureMatrix().getMatrix().rows() + "\n") + "# Environmental feature matrix column number=" + getFeatureMatrix().getMatrix().columns() + "\n";
        }
        return str;
    }

    public static void main(String[] strArr) {
        PresenceAbsenceMatrixGenerator presenceAbsenceMatrixGenerator = new PresenceAbsenceMatrixGenerator();
        presenceAbsenceMatrixGenerator.setFolder("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/Chaffron/gg_sample_events_otus/");
        presenceAbsenceMatrixGenerator.setOtuClusteringDistance("15");
        presenceAbsenceMatrixGenerator.setEnvPropFileLocation("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/Chaffron/gg_allseqs2envo.tsv");
        presenceAbsenceMatrixGenerator.setFilterFeatureMatrix(true);
        presenceAbsenceMatrixGenerator.setOtuMinOccurrenceFilter(4);
        presenceAbsenceMatrixGenerator.setSampleMinOccurrenceFilter(3);
        presenceAbsenceMatrixGenerator.parse();
        presenceAbsenceMatrixGenerator.getFeatureMatrix().writeMatrix("featuresChaffron" + ("-D" + presenceAbsenceMatrixGenerator.getOtuClusteringDistance() + "-Frows" + presenceAbsenceMatrixGenerator.getOtuMinOccurrenceFilter()) + ".txt", "\t", true, true);
        System.out.println(presenceAbsenceMatrixGenerator.toString());
    }
}
