package be.ac.vub.bsb.parsers.genomesize;

import be.ac.vub.bsb.parsers.ncbi.TaxonomyProvider;
import be.ac.vub.bsb.parsers.util.GenericObjectMatrixProcessor;
import java.io.IOException;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import org.ujmp.core.exceptions.MatrixException;

/* JADX WARN: Classes with same name are omitted:
  input_file:be/ac/vub/bsb/parsers/genomesize/GenomeSizeWrapper.class
 */
/* loaded from: input_file:lib/be_ac_vub_bsb_parsers.jar:be/ac/vub/bsb/parsers/genomesize/GenomeSizeWrapper.class */
public class GenomeSizeWrapper extends GenericObjectMatrixProcessor {
    private String _ncbiGenomeFileLocation;
    private String _ncbiGenomeFileColSeparator;
    private String _imgGenomeFileLocation;
    private String _imgGenomeFileColSeparator;
    private String _ncbiSequenceStatusFileLocation = "";
    private String _statistics = "";
    private String _legendSource = "";
    private int _taxonIdColumn = 0;
    private int _taxonLineageColumn = -100;
    private int _taxonLevelColumn = -100;
    private int _taxonLineageRankColumn = -100;
    private String _taxonLineageSeparator = ",";
    private String _mergedGenomeSizeTable = "";
    private String _taxonGenomeSizeLinkedTable = "";
    private boolean _lowestLevelInTaxonTableIsSpecies = false;
    private String _mergingStrategy = GenomeSizeMerger.IN_DOUBT_MORE_RECENT_GENOME;
    private String _aggregationStrategyForHigherTaxa = GenomeSizeToHigherTaxaAssigner.MEDIAN;
    private String _maximumHigherTaxonRankToWhichGenomeSizeIsAssigned = TaxonomyProvider.SUPERKINGDOM;
    private boolean _assignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize = false;
    private String _maximumReplacementTaxonRank = TaxonomyProvider.FAMILY;

    public GenomeSizeWrapper(String str, String str2, String str3, String str4) {
        this._ncbiGenomeFileLocation = "";
        this._ncbiGenomeFileColSeparator = "";
        this._imgGenomeFileLocation = "";
        this._imgGenomeFileColSeparator = "";
        this._ncbiGenomeFileLocation = str;
        this._ncbiGenomeFileColSeparator = str3;
        this._imgGenomeFileLocation = str2;
        this._imgGenomeFileColSeparator = str4;
    }

    @Override // be.ac.vub.bsb.parsers.util.IObjectMatrixProcessor
    public void processMatrix() {
        createOutputTable();
    }

    public void createOutputTable() {
        new HashSet();
        NCBIGenomeFilePreprocessor nCBIGenomeFilePreprocessor = getNcbiSequenceStatusFileLocation().isEmpty() ? new NCBIGenomeFilePreprocessor(this._ncbiGenomeFileLocation, this._ncbiGenomeFileColSeparator) : new NCBIGenomeFilePreprocessor(this._ncbiGenomeFileLocation, this._ncbiGenomeFileColSeparator, getNcbiSequenceStatusFileLocation());
        nCBIGenomeFilePreprocessor.setLogger(super.getLogger());
        nCBIGenomeFilePreprocessor.setTreatTaxaWithoutSequenceStatusAsUnfinished(true);
        nCBIGenomeFilePreprocessor.processMatrix();
        Set<Integer> unfinishedTaxaIdentifiers = nCBIGenomeFilePreprocessor.getUnfinishedTaxaIdentifiers();
        getLogger().info("Unfinished NCBI genome identifiers: " + unfinishedTaxaIdentifiers.toString());
        GenomeSizeParser genomeSizeParser = new GenomeSizeParser(this._imgGenomeFileLocation, this._imgGenomeFileColSeparator, nCBIGenomeFilePreprocessor.getOutputMatrix());
        genomeSizeParser.setLogger(super.getLogger());
        genomeSizeParser.processMatrix();
        GenomeSizeStatisticsProvider genomeSizeStatisticsProvider = new GenomeSizeStatisticsProvider(genomeSizeParser.getOutputMatrix());
        genomeSizeStatisticsProvider.computeStatistics();
        this._statistics = genomeSizeStatisticsProvider.getStatistics();
        GenomeSizeMerger genomeSizeMerger = new GenomeSizeMerger(genomeSizeParser.getOutputMatrix());
        genomeSizeMerger.setMergeStrategy(getMergingStrategy());
        genomeSizeMerger.setLogger(super.getLogger());
        genomeSizeMerger.processMatrix();
        if (!getMergedGenomeSizeTable().isEmpty()) {
            try {
                genomeSizeMerger.getOutputMatrix().exportToFile(getMergedGenomeSizeTable(), new Object[0]);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (MatrixException e2) {
                e2.printStackTrace();
            }
        }
        if (!super.getInputMatrix().isEmpty()) {
            TaxonToGenomeSizeLinker taxonToGenomeSizeLinker = new TaxonToGenomeSizeLinker(super.getInputMatrix(), getTaxonIdColumn(), genomeSizeMerger.getOutputMatrix());
            taxonToGenomeSizeLinker.setTaxonLevelColumn(getTaxonLevelColumn());
            taxonToGenomeSizeLinker.setLogger(super.getLogger());
            taxonToGenomeSizeLinker.setIdsOfTaxaWithUnfinsihedNCBIGenomes(unfinishedTaxaIdentifiers);
            taxonToGenomeSizeLinker.setStrainAggregationStrategy(getAggregationStrategyForHigherTaxa());
            taxonToGenomeSizeLinker.setLowestTaxonLevelInTaxonTableIsSpecies(isLowestLevelInTaxonTableIsSpecies());
            taxonToGenomeSizeLinker.processMatrix();
            if (!getTaxonGenomeSizeLinkedTable().isEmpty()) {
                try {
                    taxonToGenomeSizeLinker.getOutputMatrix().exportToFile(getTaxonGenomeSizeLinkedTable(), new Object[0]);
                } catch (IOException e3) {
                    e3.printStackTrace();
                } catch (MatrixException e4) {
                    e4.printStackTrace();
                }
            }
            GenomeSizeToHigherTaxaAssigner genomeSizeToHigherTaxaAssigner = new GenomeSizeToHigherTaxaAssigner(taxonToGenomeSizeLinker.getOutputMatrix(), getTaxonIdColumn(), taxonToGenomeSizeLinker.getGenomeSizeColumn());
            genomeSizeToHigherTaxaAssigner.setAggregatingStrategy(getAggregationStrategyForHigherTaxa());
            genomeSizeToHigherTaxaAssigner.setMaxAllowedTaxonLevel(getMaximumHigherTaxonRankToWhichGenomeSizeIsAssigned());
            genomeSizeToHigherTaxaAssigner.setTaxonRankColumn(getTaxonLevelColumn());
            genomeSizeToHigherTaxaAssigner.setTaxonLineageColumn(getTaxonLineageColumn());
            genomeSizeToHigherTaxaAssigner.setLineageMemberSeparator(getTaxonLineageSeparator());
            genomeSizeToHigherTaxaAssigner.setFillMissingValuesWithNextHigherTaxonValue(isAssignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize());
            if (isAssignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize()) {
                genomeSizeToHigherTaxaAssigner.setLineageRankColumn(getTaxonLineageRankColumn());
                genomeSizeToHigherTaxaAssigner.setMaximalReplacementTaxonRank(getMaximumReplacementTaxonRank());
            }
            genomeSizeToHigherTaxaAssigner.setLogger(super.getLogger());
            genomeSizeToHigherTaxaAssigner.processMatrix();
            super.setOutputMatrix(genomeSizeToHigherTaxaAssigner.getOutputMatrix());
        }
        this._legendSource = String.valueOf(this._legendSource) + GenomeSizeMerger.IMG_NCBI_IDENTICAL_SOURCE + "=genome size identical in NCBI and IMG\n";
        this._legendSource = String.valueOf(this._legendSource) + GenomeSizeMerger.NCBI_DATA_SOURCE_ONLY + "=genome only present in NCBI\n";
        this._legendSource = String.valueOf(this._legendSource) + GenomeSizeMerger.IMG_DATA_SOURCE_ONLY + "=genome only present in IMG\n";
        if (getMergingStrategy().equals(GenomeSizeMerger.IMG_NCBI_AVG_SOURCE)) {
            this._legendSource = String.valueOf(this._legendSource) + GenomeSizeMerger.IMG_NCBI_AVG_SOURCE + "=genome size averaged for NCBI and IMG (mean)\n";
        } else if (getMergingStrategy().equals(GenomeSizeMerger.IN_DOUBT_MORE_RECENT_GENOME)) {
            this._legendSource = String.valueOf(this._legendSource) + GenomeSizeMerger.NCBI_DATA_SOURCE_MORE_RECENT + "=genome last update more recent in NCBI\n";
            this._legendSource = String.valueOf(this._legendSource) + GenomeSizeMerger.IMG_DATA_SOURCE_MORE_RECENT + "=genome last update more recent in IMG\n";
            this._legendSource = String.valueOf(this._legendSource) + GenomeSizeMerger.IMG_NCBI_AVG_SOURCE + "=genome size averaged for NCBI and IMG (mean), because dates are the same\n";
        } else if (getMergingStrategy().equals(GenomeSizeMerger.IN_DOUBT_NCBI)) {
            this._legendSource = String.valueOf(this._legendSource) + GenomeSizeMerger.NCBI_PREFERRED + "=genome of different size present in IMG and NCBI, NCBI genome size selected\n";
        } else if (getMergingStrategy().equals(GenomeSizeMerger.IN_DOUBT_IMG)) {
            this._legendSource = String.valueOf(this._legendSource) + GenomeSizeMerger.NCBI_PREFERRED + "=genome of different size present in IMG and NCBI, IMG genome size selected\n";
        }
        this._legendSource = String.valueOf(this._legendSource) + TaxonToGenomeSizeLinker.NCBI_GENOME_INCOMPLETE + "=genome not completely sequenced in NCBI\n";
        this._legendSource = String.valueOf(this._legendSource) + TaxonToGenomeSizeLinker.NOT_PRESENT_IN_IMG_OR_NCBI + "=genome size not available via IMG finished genomes or NCBI\n";
        this._legendSource = String.valueOf(this._legendSource) + TaxonToGenomeSizeLinker.HIGHER_LEVEL_TAXON + "=genome size of higher level taxon computed by taking the " + getAggregationStrategyForHigherTaxa() + " of member taxa genome sizes\n";
        this._legendSource = String.valueOf(this._legendSource) + TaxonToGenomeSizeLinker.CURRENT_NCBI + "=genome size obtained from on-line query of current NCBI database \n";
        if (isLowestLevelInTaxonTableIsSpecies()) {
            this._legendSource = String.valueOf(this._legendSource) + TaxonToGenomeSizeLinker.SPECIES_LEVEL_GENOME_SIZE_COMPUTED_FROM_BOTH_NCBI_AND_IMG + "=genome size for species level was computed from strains in both IMG and NCBI \n";
            this._legendSource = String.valueOf(this._legendSource) + TaxonToGenomeSizeLinker.SPECIES_LEVEL_GENOME_SIZE_COMPUTED_FTOM_IMG + "=genome size for species level was computed from strains in IMG only \n";
            this._legendSource = String.valueOf(this._legendSource) + TaxonToGenomeSizeLinker.SPECIES_LEVEL_GENOMNE_SIZE_COMPUTED_FROM_NCBI + "=genome size for species level was computed from strains in NCBI only \n";
        }
        if (isAssignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize()) {
            this._legendSource = String.valueOf(this._legendSource) + GenomeSizeToHigherTaxaAssigner.GENOME_SIZE_SET_FROM_SUPER_TAXON + "=genome size is set to the genome size of the super taxon to which this taxon belongs \n";
        }
    }

    public void setTaxonIdColumn(int i) {
        this._taxonIdColumn = i;
    }

    public int getTaxonIdColumn() {
        return this._taxonIdColumn;
    }

    public void setTaxonLineageColumn(int i) {
        this._taxonLineageColumn = i;
    }

    public int getTaxonLineageColumn() {
        return this._taxonLineageColumn;
    }

    public void setTaxonLineageSeparator(String str) {
        this._taxonLineageSeparator = str;
    }

    public String getTaxonLineageSeparator() {
        return this._taxonLineageSeparator;
    }

    public void setTaxonLevelColumn(int i) {
        this._taxonLevelColumn = i;
    }

    public int getTaxonLevelColumn() {
        return this._taxonLevelColumn;
    }

    public void setTaxonLineageRankColumn(int i) {
        this._taxonLineageRankColumn = i;
    }

    public int getTaxonLineageRankColumn() {
        return this._taxonLineageRankColumn;
    }

    public void setMergingStrategy(String str) {
        this._mergingStrategy = str;
    }

    public String getMergingStrategy() {
        return this._mergingStrategy;
    }

    public void setAggregationStrategyForHigherTaxa(String str) {
        this._aggregationStrategyForHigherTaxa = str;
    }

    public String getAggregationStrategyForHigherTaxa() {
        return this._aggregationStrategyForHigherTaxa;
    }

    public void setMaximumHigherTaxonRankToWhichGenomeSizeIsAssigned(String str) {
        this._maximumHigherTaxonRankToWhichGenomeSizeIsAssigned = str;
    }

    public String getMaximumHigherTaxonRankToWhichGenomeSizeIsAssigned() {
        return this._maximumHigherTaxonRankToWhichGenomeSizeIsAssigned;
    }

    public void setAssignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize(boolean z) {
        this._assignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize = z;
    }

    public boolean isAssignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize() {
        return this._assignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize;
    }

    public void setMaximumReplacementTaxonRank(String str) {
        this._maximumReplacementTaxonRank = str;
    }

    public String getMaximumReplacementTaxonRank() {
        return this._maximumReplacementTaxonRank;
    }

    public void setNcbiSequenceStatusFileLocation(String str) {
        this._ncbiSequenceStatusFileLocation = str;
    }

    public String getNcbiSequenceStatusFileLocation() {
        return this._ncbiSequenceStatusFileLocation;
    }

    public void setLowestLevelInTaxonTableIsSpecies(boolean z) {
        this._lowestLevelInTaxonTableIsSpecies = z;
    }

    public boolean isLowestLevelInTaxonTableIsSpecies() {
        return this._lowestLevelInTaxonTableIsSpecies;
    }

    public void setMergedGenomeSizeTable(String str) {
        this._mergedGenomeSizeTable = str;
    }

    public String getMergedGenomeSizeTable() {
        return this._mergedGenomeSizeTable;
    }

    public void setTaxonGenomeSizeLinkedTable(String str) {
        this._taxonGenomeSizeLinkedTable = str;
    }

    public String getTaxonGenomeSizeLinkedTable() {
        return this._taxonGenomeSizeLinkedTable;
    }

    public String toString() {
        String str = String.valueOf(String.valueOf(String.valueOf(String.valueOf("# ") + "Genome size to taxon assignment\n") + "# Date=" + new Date().toString() + "\n") + "# INPUT\n") + "# NCBI finished genomes source file=" + this._ncbiGenomeFileLocation + "\n";
        if (!getNcbiSequenceStatusFileLocation().isEmpty()) {
            str = String.valueOf(str) + "# NCBI genome sequence status file=" + getNcbiSequenceStatusFileLocation() + "\n";
        }
        return String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(String.valueOf(str) + "# IMG finished genomes source file=" + this._imgGenomeFileLocation + "\n") + "# Taxon matrix taxon number=" + super.getInputMatrix().getSize()[0] + "\n") + "# PARAMETER\n") + "# NCBI/IMG genome size merge strategy=" + getMergingStrategy() + "\n") + "# Higher taxa member genome sizes aggregation strategy=" + getAggregationStrategyForHigherTaxa() + "\n") + "# Maximum taxon level up to which aggregated genome size is calculated=" + getMaximumHigherTaxonRankToWhichGenomeSizeIsAssigned() + "\n") + "# Taxon table lowest taxon level is species (instead of strain)=" + isLowestLevelInTaxonTableIsSpecies() + "\n") + "# Higher-level taxon genome size is assigned to species with missing genome size where possible=" + isAssignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize() + "\n") + "# Higher-level taxon whose genome size is assigned to species with missing genome size has highest allowed rank=" + getMaximumReplacementTaxonRank() + "\n") + "# COMPARATIVE STATISTICS OF GENOME SIZES FOR IMG AND NCBI\n") + this._statistics) + "# LEGEND FOR DATA SOURCE COLUMN\n") + this._legendSource;
    }

    public static void main(String[] strArr) {
        GenomeSizeWrapper genomeSizeWrapper = new GenomeSizeWrapper("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/NCBI/NCBI_Microbial_Genomes/summary.csv", "/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/IMG/genome_export_finished_microbes_viruses_merged.csv", ";", ";");
        genomeSizeWrapper.setMergedGenomeSizeTable("mergedGenomeSizesNCBIAndIMG.txt");
        genomeSizeWrapper.setTaxonIdColumn(0);
        genomeSizeWrapper.setTaxonLevelColumn(1);
        genomeSizeWrapper.setTaxonLineageColumn(2);
        genomeSizeWrapper.setTaxonLineageRankColumn(3);
        genomeSizeWrapper.setTaxonLineageSeparator(",");
        genomeSizeWrapper.setNcbiSequenceStatusFileLocation("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/NCBI/NCBI_Microbial_Genomes/lproks_0.txt");
        genomeSizeWrapper.setMergingStrategy(GenomeSizeMerger.IN_DOUBT_MORE_RECENT_GENOME);
        genomeSizeWrapper.setAggregationStrategyForHigherTaxa(GenomeSizeToHigherTaxaAssigner.MEDIAN);
        genomeSizeWrapper.setMaximumHigherTaxonRankToWhichGenomeSizeIsAssigned(TaxonomyProvider.SUPERKINGDOM);
        genomeSizeWrapper.setLowestLevelInTaxonTableIsSpecies(true);
        genomeSizeWrapper.setAssignHigherTaxaGenomeSizeToMissingSpeciesGenomeSize(true);
        genomeSizeWrapper.setMaximumReplacementTaxonRank(TaxonomyProvider.FAMILY);
        genomeSizeWrapper.processMatrix();
        try {
            genomeSizeWrapper.getOutputMatrix().exportToFile("taxonsWithGenomeSizes.txt", new Object[0]);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (MatrixException e2) {
            e2.printStackTrace();
        }
        System.out.println(genomeSizeWrapper.toString());
    }
}
