package be.ac.vub.bsb.parsers.ncbi;

import be.ac.ulb.bigre.pathwayinference.core.io.IOTools;
import be.ac.ulb.bigre.pathwayinference.core.io.OneColumnSetParser;
import be.ac.ulb.bigre.pathwayinference.core.util.DiverseTools;
import java.io.File;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.log4j.Logger;
import org.htmlparser.parserapplications.StringExtractor;
import org.htmlparser.util.ParserException;

/* loaded from: input_file:be/ac/vub/bsb/parsers/ncbi/NCBIFTPClient.class */
public class NCBIFTPClient {
    private Set<String> _genomesToDownload;
    private boolean _names;
    private String _localGenomesFolder = "";
    private boolean _skipGenomesAlreadyPresentInFolder = false;
    private String _remotePathFilter = "";
    private Logger _logger = Logger.getLogger(getClass().getPackage().toString());
    public static String FTP_HOST = "ftp.bio.net";
    public static String AXEL_PATH = "/opt/local/bin";
    public static String SCAFFOLD_FILE_SUFFIX = ".scaffold.fna.tgz";
    public static String CONTIG_FILE_SUFFIX = ".contig.fna.tgz";
    public static String DEFAULT_LOCAL_FOLDER = "ReferenceGenomes";

    public NCBIFTPClient(String str, boolean z) {
        this._genomesToDownload = new HashSet();
        this._names = false;
        this._genomesToDownload = new OneColumnSetParser(str).parse();
        this._names = z;
    }

    public void downloadGenomes() {
        if (this._localGenomesFolder.isEmpty()) {
            setLocalGenomesFolder(DEFAULT_LOCAL_FOLDER);
        }
        File file = new File(getLocalGenomesFolder());
        if (!file.exists()) {
            file.mkdirs();
        } else if (!file.canWrite()) {
            this._logger.fatal("Cannot write into folder " + getLocalGenomesFolder() + "!");
            System.exit(-1);
        }
        Iterator<String> it = this._genomesToDownload.iterator();
        while (it.hasNext()) {
            String next = it.next();
            boolean z = false;
            if (this._names) {
                next = NCBIDBOnlineQueries.getRefSeqGenomeAccessionNumberGivenOrganismName(next);
            }
            if (isSkipGenomesAlreadyPresentInFolder() && (IOTools.filePresentInDirectory(String.valueOf(next) + SCAFFOLD_FILE_SUFFIX, getLocalGenomesFolder()) || IOTools.filePresentInDirectory(String.valueOf(next) + ".contig.fna.tgz", getLocalGenomesFolder()))) {
                z = true;
            }
            if (z) {
                this._logger.info("Skipping genome " + next + ".");
            } else {
                this._logger.info("Downloading genome " + next + "...");
                String fTPPathToGenome = getFTPPathToGenome(next, false);
                if (fTPPathToGenome.isEmpty()) {
                    fTPPathToGenome = getFTPPathToGenome(next, true);
                }
                if (fTPPathToGenome.isEmpty()) {
                    this._logger.warn("Could not find genome " + next + " on FTP server " + FTP_HOST + " using the file search utility.");
                } else {
                    wgetFile(fTPPathToGenome);
                }
            }
        }
    }

    public String getFTPPathToGenome(String str, boolean z) {
        String str2 = "";
        StringExtractor stringExtractor = new StringExtractor("http://www.filesearching.com/cgi-bin/s?q=" + str + "&t=f&d=&x=0&y=0&l=en");
        String str3 = SCAFFOLD_FILE_SUFFIX;
        if (z) {
            str3 = CONTIG_FILE_SUFFIX;
        }
        try {
            for (String str4 : stringExtractor.extractStrings(false).split("\n")) {
                if (str4.endsWith(String.valueOf(str) + str3)) {
                    if (getRemotePathFilter().isEmpty()) {
                        str2 = str4;
                    } else if (str4.contains(getRemotePathFilter())) {
                        str2 = str4;
                    }
                }
            }
        } catch (ParserException e) {
            e.printStackTrace();
        }
        if (!str2.isEmpty()) {
            String[] split = str2.split(FTP_HOST);
            str2 = split[split.length - 1].trim();
        }
        return str2;
    }

    public void wgetFile(String str) {
        DiverseTools.downloadFileFromFTPServerAxel(FTP_HOST, str, IOTools.getFileWithoutDir(str), getLocalGenomesFolder(), AXEL_PATH, true);
    }

    public void setLocalGenomesFolder(String str) {
        this._localGenomesFolder = str;
    }

    public String getLocalGenomesFolder() {
        return this._localGenomesFolder;
    }

    public void setRemotePathFilter(String str) {
        this._remotePathFilter = str;
    }

    public String getRemotePathFilter() {
        return this._remotePathFilter;
    }

    public void setSkipGenomesAlreadyPresentInFolder(boolean z) {
        this._skipGenomesAlreadyPresentInFolder = z;
    }

    public boolean isSkipGenomesAlreadyPresentInFolder() {
        return this._skipGenomesAlreadyPresentInFolder;
    }

    public static void main(String[] strArr) {
        NCBIFTPClient nCBIFTPClient = new NCBIFTPClient("orgFile.txt", false);
        nCBIFTPClient.setLocalGenomesFolder("/Users/karoline/Documents/Documents_Karoline/BSB_Lab/Data/MetaHIT/ReadMapping/ReferenceGenomesAssembledFromDifferentSources/MetaHIT_RefGenomes");
        nCBIFTPClient.setRemotePathFilter("HUMAN_MICROBIOM");
        nCBIFTPClient.setSkipGenomesAlreadyPresentInFolder(true);
        nCBIFTPClient.downloadGenomes();
    }
}
