package be.ac.ulb.bigre.metabolicdatabase.core;

import RSATWS.GeneInfoRequest;
import RSATWS.RSATWSPortType;
import RSATWS.RSATWebServicesLocator;
import be.ac.ulb.bigre.keggclient.client.KeggClient;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Bioentity;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Compound;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Ecnumber;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Gene;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Organism;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Pathway;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Polypeptide;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Reaction;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Subreaction;
import be.ac.ulb.bigre.metabolicdatabase.pojos.Transformation;
import be.ac.ulb.bigre.metabolicdatabase.util.DataLoadingHelper;
import be.ac.ulb.bigre.metabolicdatabase.util.MetabolicDBObjectPrinter;
import be.ac.ulb.bigre.pathwayinference.core.PathwayinferenceConstants;
import be.ac.ulb.bigre.pathwayinference.core.util.DiverseTools;
import cern.colt.matrix.impl.AbstractFormatter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.parserapplications.StringExtractor;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.ParserException;

/* loaded from: input_file:lib/be_ac_ulb_bigre_metabolicdatabase.jar:be/ac/ulb/bigre/metabolicdatabase/core/MetabolicDBHelperTools.class */
public class MetabolicDBHelperTools {
    private static final String SEQUENCE_PARTICIPANT = "sequenceParticipant";
    private static final String SEQUENCE_INTERVAL = "sequenceInterval";
    private static final String SEQUENCE_FEATURE = "sequenceFeature";
    private static final String SEQUENCE_FEATURE_LIST = "SEQUENCE-FEATURE-LIST";
    private static final String SEQUENCE_SITE = "sequenceSite";
    private static final String BIOPAX_LEVEL_2 = "biopax-level2.owl";
    private static final String BIOPAX_LEVEL_1 = "biopax-level1.owl";

    public static Polypeptide getBiocycProteinInfo(String str, String str2) {
        Polypeptide polypeptide = new Polypeptide();
        Gene gene = new Gene();
        gene.setName(str);
        gene.setOrganismSpecificDbId(str);
        new Reaction();
        HashSet hashSet = new HashSet();
        Parser parser = new Parser();
        String str3 = "http://biocyc.org/ECOLI/search-query?type=GENE&gname=" + str;
        try {
            parser.setConnection((HttpURLConnection) new URL(str3).openConnection());
            Node[] nodeArray = parser.parse(new NodeClassFilter(LinkTag.class)).toNodeArray();
            int length = nodeArray.length;
            int i = 0;
            while (true) {
                if (i >= length) {
                    break;
                }
                LinkTag linkTag = (LinkTag) nodeArray[i];
                if (linkTag.isHTTPLink()) {
                    String link = linkTag.getLink();
                    if (link.contains("type=GENE&object=")) {
                        str3 = link;
                        break;
                    }
                }
                i++;
            }
            parser.setConnection((HttpURLConnection) new URL(str3).openConnection());
            for (Node node : parser.parse(new NodeClassFilter(LinkTag.class)).toNodeArray()) {
                LinkTag linkTag2 = (LinkTag) node;
                if (linkTag2.isHTTPLink()) {
                    String link2 = linkTag2.getLink();
                    if (link2.contains("type=REACTION")) {
                        String str4 = link2.split("type=REACTION&object=")[1];
                        String linkText = linkTag2.getLinkText();
                        Reaction reaction = new Reaction();
                        reaction.setName(str4);
                        reaction.setBiocycUniqueId(str4);
                        reaction.setEquation(linkText);
                        hashSet.add(reaction);
                    }
                    if (link2.contains("type=ENZYME")) {
                        String str5 = link2.split("type=ENZYME&object=")[1];
                        polypeptide.setName(linkTag2.getLinkText());
                        polypeptide.setSynonyms(str5);
                    }
                }
            }
            for (String str6 : new StringExtractor(str3).extractStrings(false).split("\n")) {
                if (str6.contains("Synonyms:")) {
                    gene.setSynonyms(DataLoadingHelper.setToString(DiverseTools.stringToSet(str6, ", ")));
                }
            }
            polypeptide.getGenes().add(gene);
            gene.getPolypeptides().add(polypeptide);
            polypeptide.getReactions().addAll(hashSet);
            Iterator it = hashSet.iterator();
            while (it.hasNext()) {
                ((Reaction) it.next()).getPolypeptides().add(polypeptide);
            }
            MetabolicDBObjectPrinter.printCatalysis(polypeptide);
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e2) {
            e2.printStackTrace();
        } catch (ParserException e3) {
            e3.printStackTrace();
        }
        return polypeptide;
    }

    public static Set<Gene> getKEGGGene(String str, Set<String> set) {
        new Gene();
        HashSet hashSet = new HashSet();
        boolean z = false;
        try {
            for (String str2 : new StringExtractor("http://www.genome.jp/dbget-bin/www_bget?" + str).extractStrings(false).split("\n")) {
                if (z) {
                    for (String str3 : set) {
                        if (str2.startsWith(String.valueOf(str3.toUpperCase()) + MetabolicDatabaseConstants.CODE_SEPARATOR)) {
                            String str4 = str2.split(String.valueOf(str3.toUpperCase()) + ": ")[1];
                            for (String str5 : str4.contains(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR) ? str4.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR) : new String[]{str4}) {
                                Gene gene = new Gene();
                                if (str5.contains("(")) {
                                    gene.setName(str5.split("\\(")[0].trim());
                                    String str6 = str5.split("\\(")[1];
                                    if (str6.contains(")")) {
                                        str6 = str6.replace(")", "");
                                    }
                                    gene.setSynonyms(str6.trim());
                                } else {
                                    gene.setName(str5.trim());
                                }
                                hashSet.add(gene);
                            }
                        }
                    }
                }
                if (str2.startsWith("Genes")) {
                    z = true;
                }
            }
        } catch (ParserException e) {
            e.printStackTrace();
        }
        if (hashSet.isEmpty()) {
            Iterator<String> it = getKEGGKO(str).iterator();
            while (it.hasNext()) {
                hashSet.addAll(getKEGGGeneViaKO(it.next(), set));
            }
        }
        return hashSet;
    }

    public static Set<Gene> getKEGGGeneViaKO(String str, Set<String> set) {
        new Gene();
        HashSet hashSet = new HashSet();
        boolean z = false;
        try {
            for (String str2 : new StringExtractor("http://www.genome.jp/dbget-bin/www_bget?ko:" + str).extractStrings(false).split("\n")) {
                if (z) {
                    for (String str3 : set) {
                        if (str2.startsWith(String.valueOf(str3.toUpperCase()) + MetabolicDatabaseConstants.CODE_SEPARATOR)) {
                            String str4 = str2.split(String.valueOf(str3.toUpperCase()) + ": ")[1];
                            for (String str5 : str4.contains(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR) ? str4.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR) : new String[]{str4}) {
                                Gene gene = new Gene();
                                if (str5.contains("(")) {
                                    gene.setName(str5.split("\\(")[0].trim());
                                    String str6 = str5.split("\\(")[1];
                                    if (str6.contains(")")) {
                                        str6 = str6.replace(")", "");
                                    }
                                    gene.setSynonyms(str6.trim());
                                } else {
                                    gene.setName(str5.trim());
                                }
                                hashSet.add(gene);
                            }
                        }
                    }
                }
                if (str2.startsWith("Genes")) {
                    z = true;
                }
            }
        } catch (ParserException e) {
            e.printStackTrace();
        }
        return hashSet;
    }

    public static Set<String> getKEGGKO(String str) {
        HashSet hashSet = new HashSet();
        boolean z = false;
        try {
            for (String str2 : new StringExtractor("http://www.genome.jp/dbget-bin/www_bget?" + str).extractStrings(false).split("\n")) {
                if (z && str2.startsWith("KO: ")) {
                    String str3 = str2.split("KO: ")[1];
                    if (str3.contains(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                        hashSet.add(str3.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)[0]);
                    } else {
                        hashSet.add(str3);
                    }
                }
                if (str2.startsWith("Orthology")) {
                    z = true;
                }
            }
        } catch (ParserException e) {
            e.printStackTrace();
        }
        return hashSet;
    }

    public static Polypeptide getGeneInfoFromKEGGAPI(String str, String str2) {
        new Ecnumber();
        HashSet hashSet = new HashSet();
        new HashSet();
        Gene gene = new Gene();
        Polypeptide polypeptide = new Polypeptide();
        String str3 = "";
        boolean z = false;
        for (String str4 : KeggClient.getGeneInfo(str).split("\n")) {
            if (str4.startsWith("NAME") && !z) {
                gene.setSynonyms(str4.split("NAME")[1].trim());
            }
            if (str4.contains(MetabolicDatabaseConstants.KEGG_NCBI_GENEID_FIELD) && !z) {
                gene.setNcbiGeneId(str4.split("NCBI-GeneID:")[1].trim());
            }
            if (!getOrganismSpecificFieldForGivenOrg(str2).equals("") && str4.contains(getOrganismSpecificFieldForGivenOrg(str2)) && !z) {
                gene.setOrganismSpecificDbId(str4.split(String.valueOf(getOrganismSpecificFieldForGivenOrg(str2)) + MetabolicDatabaseConstants.CODE_SEPARATOR)[1].trim());
            }
            if (str4.contains(MetabolicDatabaseConstants.UNIPROT) && !z) {
                polypeptide.setUniprotId(str4.split("UniProt:")[1].trim());
            }
            if (str4.startsWith(MetabolicDatabaseConstants.KEGG_NTSEQ_FIELD)) {
                z = false;
            }
            if (z) {
                str3 = String.valueOf(str3) + str4.trim();
            }
            if (str4.startsWith(MetabolicDatabaseConstants.KEGG_AASEQ_FIELD) && !z) {
                z = true;
            }
            if (str4.startsWith(MetabolicDatabaseConstants.KEGG_DEFINITION_FIELD) && !z) {
                String trim = str4.split(MetabolicDatabaseConstants.KEGG_DEFINITION_FIELD)[1].trim();
                if (trim.contains(";") || trim.contains(", ")) {
                    trim = trim.split("([;]|[,]{1}[ ]{1})")[0];
                } else if (trim.contains("that") || trim.contains("required for") || trim.contains("involved in")) {
                    trim = trim.split("(required for|that|involved in)")[0];
                }
                polypeptide.setName(trim);
            }
        }
        polypeptide.setSequence(str3);
        for (String str5 : KeggClient.getEcsByGene(str)) {
            Ecnumber ecnumber = new Ecnumber();
            ecnumber.setEcNumber(str5.split(MetabolicDatabaseConstants.CODE_SEPARATOR)[1]);
            hashSet.add(ecnumber);
        }
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            ((Ecnumber) it.next()).getPolypeptides().add(polypeptide);
        }
        polypeptide.getEcnumbers().addAll(hashSet);
        gene.getPolypeptides().add(polypeptide);
        polypeptide.getGenes().add(gene);
        return polypeptide;
    }

    public static Polypeptide getGeneInfoFromKEGGHTML(String str, String str2) {
        Polypeptide polypeptide = new Polypeptide();
        HashSet hashSet = new HashSet();
        new Ecnumber();
        Gene gene = new Gene();
        String str3 = "http://www.genome.jp/dbget-bin/www_bget?" + str2 + "+" + str;
        new HashSet();
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = false;
        boolean z6 = false;
        boolean z7 = false;
        boolean z8 = false;
        ArrayList arrayList = new ArrayList();
        String str4 = "";
        try {
            for (String str5 : new StringExtractor(str3).extractStrings(false).split("\n")) {
                if (str5.contains("[EC:")) {
                    String trim = str5.split("EC:")[1].trim();
                    if (trim.endsWith("]")) {
                        trim = trim.replaceAll("]", "");
                    }
                    if (trim.contains(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                        for (String str6 : trim.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                            Ecnumber ecnumber = new Ecnumber();
                            ecnumber.setEcNumber(str6.trim());
                            hashSet.add(ecnumber);
                        }
                    } else {
                        Ecnumber ecnumber2 = new Ecnumber();
                        ecnumber2.setEcNumber(trim);
                        hashSet.add(ecnumber2);
                    }
                }
                if (z4 && !z8) {
                    polypeptide.setName(str5.trim());
                    z8 = true;
                }
                if (z5 && !z3) {
                    gene.setSynonyms(DataLoadingHelper.setToString(DiverseTools.stringToSet(str5, ", ")));
                    z3 = true;
                }
                if (z && !z2) {
                    gene.setName(str5.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)[0]);
                    z2 = true;
                }
                if (str5.startsWith("Entry")) {
                    z = true;
                }
                if (str5.startsWith("Definition")) {
                    z4 = true;
                }
                if (str5.startsWith("Gene name")) {
                    z5 = true;
                }
                if (str5.contains("UniProt: ")) {
                    polypeptide.setUniprotId(str5.split("UniProt: ")[1].trim());
                }
                if (str5.contains(MetabolicDatabaseConstants.KEGG_NCBI_GENEID_FIELD)) {
                    gene.setNcbiGeneId(str5.split("NCBI-GeneID: ")[1].trim());
                }
                if (!getOrganismSpecificFieldForGivenOrg(str2).equals("")) {
                    try {
                        gene.setOrganismSpecificDbId(str5.split(String.valueOf(getOrganismSpecificFieldForGivenOrg(str2)) + MetabolicDatabaseConstants.CODE_SEPARATOR)[1].trim());
                    } catch (ArrayIndexOutOfBoundsException e) {
                        gene.setOrganismSpecificDbId("");
                    }
                }
                if (str5.startsWith("NT seq")) {
                    z7 = true;
                }
                if (z6 && !z7) {
                    arrayList.add(str5);
                }
                if (str5.startsWith("AA seq")) {
                    z6 = true;
                }
            }
        } catch (ParserException e2) {
            e2.printStackTrace();
        }
        for (int i = 1; i < arrayList.size(); i++) {
            str4 = String.valueOf(str4) + ((String) arrayList.get(i));
        }
        polypeptide.setSequence(str4);
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            ((Ecnumber) it.next()).getPolypeptides().add(polypeptide);
        }
        polypeptide.getEcnumbers().addAll(hashSet);
        gene.getPolypeptides().add(polypeptide);
        polypeptide.getGenes().add(gene);
        return polypeptide;
    }

    public static String getOrganismSpecificFieldForGivenOrg(String str) {
        HashMap hashMap = new HashMap();
        hashMap.put("sce", "SGD");
        hashMap.put("eco", "EcoGene");
        hashMap.put("hsa", "HGNC");
        hashMap.put("mmu", "Ensembl");
        hashMap.put("rno", "RATMAP");
        hashMap.put("dme", "FlyBase");
        hashMap.put("cel", "WormBase");
        hashMap.put("ath", "TAIR");
        hashMap.put("dre", "ZFIN");
        hashMap.put("gga", "Ensembl");
        hashMap.put("ddi", "DictyBase");
        hashMap.put("pfa", "PlasmoDB");
        hashMap.put("bsu", "BSORF");
        return hashMap.containsKey(str) ? (String) hashMap.get(str) : "";
    }

    public static String OwlFileFilter(String str, String str2) {
        String str3 = String.valueOf(DiverseTools.getTempFileName()) + ".owl";
        String str4 = str2.equals("") ? str3 : String.valueOf(str2) + PathwayinferenceConstants.PATH_SEPARATOR + str3;
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = false;
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
            PrintWriter printWriter = new PrintWriter(new BufferedWriter(new FileWriter(str4)));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String trim = readLine.trim();
                if (trim.contains(BIOPAX_LEVEL_2)) {
                    trim = trim.replace(BIOPAX_LEVEL_2, BIOPAX_LEVEL_1);
                }
                if (trim.contains(SEQUENCE_PARTICIPANT) && !z) {
                    z = true;
                }
                if (trim.contains(SEQUENCE_FEATURE) && !z2) {
                    z2 = true;
                }
                if (trim.contains(SEQUENCE_INTERVAL) && !z4) {
                    z4 = true;
                }
                if (trim.contains(SEQUENCE_SITE) && !z5) {
                    z5 = true;
                }
                if (trim.contains(SEQUENCE_FEATURE_LIST) && !z3) {
                    z3 = true;
                }
                if (!z4 && !z2 && !z && !z5 && !z3) {
                    printWriter.print(String.valueOf(trim) + "\n");
                }
                if (trim.contains(SEQUENCE_PARTICIPANT) && z) {
                    z = false;
                }
                if (trim.contains(SEQUENCE_FEATURE) && z2) {
                    z2 = false;
                }
                if (trim.contains(SEQUENCE_FEATURE_LIST) && z3) {
                    z3 = false;
                }
                if (trim.contains(SEQUENCE_INTERVAL) && z4) {
                    z4 = false;
                }
                if (trim.contains(SEQUENCE_SITE) && z5) {
                    z5 = false;
                }
            }
            bufferedReader.close();
            printWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return str4;
    }

    public static String OwlFileFilter(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = false;
        for (String str2 : str.split("\n")) {
            String trim = str2.trim();
            if (trim.contains(BIOPAX_LEVEL_2)) {
                trim = trim.replace(BIOPAX_LEVEL_2, BIOPAX_LEVEL_1);
            }
            if (trim.contains(SEQUENCE_PARTICIPANT) && !z) {
                z = true;
            }
            if (trim.contains(SEQUENCE_FEATURE) && !z2) {
                z2 = true;
            }
            if (trim.contains(SEQUENCE_INTERVAL) && !z4) {
                z4 = true;
            }
            if (trim.contains(SEQUENCE_SITE) && !z5) {
                z5 = true;
            }
            if (trim.contains(SEQUENCE_FEATURE_LIST) && !z3) {
                z3 = true;
            }
            if (!z4 && !z2 && !z && !z5 && !z3) {
                stringBuffer.append(String.valueOf(trim) + "\n");
            }
            if (trim.contains(SEQUENCE_PARTICIPANT) && z) {
                z = false;
            }
            if (trim.contains(SEQUENCE_FEATURE) && z2) {
                z2 = false;
            }
            if (trim.contains(SEQUENCE_FEATURE_LIST) && z3) {
                z3 = false;
            }
            if (trim.contains(SEQUENCE_INTERVAL) && z4) {
                z4 = false;
            }
            if (trim.contains(SEQUENCE_SITE) && z5) {
                z5 = false;
            }
        }
        return stringBuffer.toString();
    }

    public static String getIdentifierOfObject(Object obj, String str) {
        String str2 = "";
        if (obj instanceof Reaction) {
            Reaction reaction = (Reaction) obj;
            if (str.toLowerCase().contains("KEGG".toLowerCase()) && reaction.hasKeggId()) {
                str2 = reaction.getKeggId();
            } else if ((str.toLowerCase().contains(MetabolicDatabaseConstants.BIOCYC.toLowerCase()) || str.toLowerCase().contains(MetabolicDatabaseConstants.METACYC.toLowerCase())) && reaction.hasBiocycUniqueId()) {
                str2 = reaction.getBiocycUniqueId();
            } else if (str.toLowerCase().contains(MetabolicDatabaseConstants.PATHWAY_COMMONS.toLowerCase()) || str.toLowerCase().contains(MetabolicDatabaseConstants.PATH_CASE.toLowerCase()) || str.toLowerCase().equals("unknown".toLowerCase())) {
                str2 = reaction.getName();
            } else if (str.toLowerCase().contains(MetabolicDatabaseConstants.REACTOME.toLowerCase())) {
                str2 = reaction.getBiocycUniqueId();
            } else if (str.toLowerCase().contains(MetabolicDatabaseConstants.AMAZE.toLowerCase())) {
                str2 = reaction.getKeggId();
            } else {
                System.err.println("Database identifier " + str + " is unknown for reaction " + reaction.getEquation() + "! If reaction provides KEGG identifier, it is set. If the reaction provides BioCyc identifier, it is set. Else, empty id is returned.");
            }
        } else if (obj instanceof Subreaction) {
            str2 = ((Subreaction) obj).getKeggId();
        } else if (obj instanceof Compound) {
            Compound compound = (Compound) obj;
            if (str.toLowerCase().contains("KEGG".toLowerCase()) && compound.hasKeggId()) {
                str2 = compound.getKeggId();
            } else if ((str.toLowerCase().contains(MetabolicDatabaseConstants.BIOCYC.toLowerCase()) || str.toLowerCase().contains(MetabolicDatabaseConstants.METACYC.toLowerCase())) && compound.hasBiocycUniqueId()) {
                str2 = compound.getBiocycUniqueId();
            } else if (str.toLowerCase().contains(MetabolicDatabaseConstants.PATHWAY_COMMONS.toLowerCase()) || str.toLowerCase().contains(MetabolicDatabaseConstants.PATH_CASE.toLowerCase()) || str.toLowerCase().equals("unknown".toLowerCase())) {
                str2 = compound.getName();
            } else if (str.toLowerCase().contains(MetabolicDatabaseConstants.REACTOME.toLowerCase())) {
                str2 = compound.getPubchemId();
            } else if (str.toLowerCase().contains(MetabolicDatabaseConstants.AMAZE.toLowerCase())) {
                str2 = compound.getKeggId();
            } else {
                System.err.println("Database identifier " + str + " is unknown for compound " + compound.getName() + "! If compound provides KEGG identifier, it is set. If the compound provides BioCyc identifier, it is set. Else, empty id is returned.");
            }
        }
        return str2;
    }

    public static Set<String> reactionSetToStringSet(Set set, boolean z) {
        HashSet hashSet = new HashSet();
        Reaction reaction = new Reaction();
        Subreaction subreaction = new Subreaction();
        for (Object obj : set) {
            boolean z2 = false;
            if (obj instanceof Reaction) {
                reaction = (Reaction) obj;
            } else if (obj instanceof Subreaction) {
                subreaction = (Subreaction) obj;
                z2 = true;
            } else if (obj instanceof Transformation) {
                reaction = (Reaction) obj;
            }
            if (z2) {
                if (subreaction.hasKeggId()) {
                    hashSet.add(subreaction.getKeggId());
                }
            } else if (z && reaction.hasKeggId()) {
                hashSet.add(reaction.getKeggId());
            } else if (reaction.hasBiocycUniqueId() && !z) {
                hashSet.add(reaction.getBiocycUniqueId());
            }
        }
        return hashSet;
    }

    public static Set<String> compoundSetToStringSet(Set<Bioentity> set, boolean z) {
        HashSet hashSet = new HashSet();
        Iterator<Bioentity> it = set.iterator();
        while (it.hasNext()) {
            Compound compound = (Compound) it.next();
            if (z && compound.hasKeggId()) {
                hashSet.add(compound.getKeggId());
            } else if (compound.hasBiocycUniqueId()) {
                hashSet.add(compound.getBiocycUniqueId());
            }
        }
        return hashSet;
    }

    public static Set<String> organismSetToStringSet(Set<Organism> set, boolean z, boolean z2) {
        HashSet hashSet = new HashSet();
        for (Organism organism : set) {
            if (z) {
                hashSet.add(organism.getName());
            }
            if (z2) {
                hashSet.addAll(DataLoadingHelper.stringToSet(organism.getSynonyms()));
            }
        }
        return hashSet;
    }

    public static Set<String> polypeptideSetToStringSet(Set<Bioentity> set, String str) {
        HashSet hashSet = new HashSet();
        Iterator<Bioentity> it = set.iterator();
        while (it.hasNext()) {
            Polypeptide polypeptide = (Polypeptide) it.next();
            if (str.equals("name")) {
                hashSet.add(polypeptide.getName());
            } else if (str.equals(MetabolicDatabaseConstants.POLYPEPTIDE_UNIPROT_ID)) {
                hashSet.add(polypeptide.getUniprotId());
            } else if (str.equals("synonyms")) {
                hashSet.addAll(DataLoadingHelper.stringToSet(polypeptide.getSynonyms()));
            } else {
                System.err.println("polypeptideSetToString: Given identifier type (" + str + ") unknown!");
            }
        }
        return hashSet;
    }

    public static Set<String> pathwaySetToString(Set<Pathway> set, String str) {
        HashSet hashSet = new HashSet();
        for (Pathway pathway : set) {
            if (str.equals("name")) {
                hashSet.add(pathway.getName());
            } else if (str.equals("synonyms")) {
                hashSet.addAll(DataLoadingHelper.stringToSet(pathway.getSynonyms()));
            }
        }
        return hashSet;
    }

    public static Set<String> pathwayStepSetToString(Set<Transformation> set) {
        HashSet hashSet = new HashSet();
        Iterator<Transformation> it = set.iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().getName());
        }
        return hashSet;
    }

    public static Set<String> EcNumberSetToString(Set<Ecnumber> set) {
        HashSet hashSet = new HashSet();
        Iterator<Ecnumber> it = set.iterator();
        while (it.hasNext()) {
            hashSet.add(it.next().getEcNumber());
        }
        return hashSet;
    }

    public static HashMap<String, Object> getGeneNamesAndDescriptionsFromSGD(List<String> list) {
        HashMap<String, Object> hashMap = new HashMap<>();
        ArrayList arrayList = new ArrayList();
        boolean z = false;
        boolean z2 = false;
        String str = "";
        String str2 = "";
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            try {
                for (String str3 : new StringExtractor(String.valueOf("http://www.yeastgenome.org/cgi-bin/locus.fpl?locus=") + list.get(i)).extractStrings(false).split("\n")) {
                    if (z) {
                        str = str3;
                        z = false;
                    }
                    if (z2) {
                        String str4 = str3;
                        if (str4.contains(";")) {
                            str4 = str4.split(";")[0];
                        }
                        for (String str5 : str4.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                            str2 = String.valueOf(str2) + str5 + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR;
                            if (str2.length() > 20) {
                                arrayList2.add(str2);
                                str2 = "";
                            }
                        }
                        if (!str2.equals("")) {
                            arrayList2.add(str2);
                        }
                        arrayList.add(str);
                        arrayList.add(arrayList2);
                        hashMap.put(list.get(i), arrayList);
                        z2 = false;
                        str2 = "";
                        arrayList = new ArrayList();
                        arrayList2 = new ArrayList();
                    }
                    if (str3.startsWith("Standard Name")) {
                        z = true;
                    }
                    if (str3.startsWith("Description")) {
                        z2 = true;
                    }
                }
            } catch (ParserException e) {
                e.printStackTrace();
            }
        }
        return hashMap;
    }

    public static HashMap<String, Object> getGeneNamesAndDescriptionsFromRSATInfoGene(List<String> list, String str) {
        HashMap<String, Object> hashMap = new HashMap<>();
        ArrayList arrayList = new ArrayList();
        String[] strArr = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            strArr[i] = list.get(i);
        }
        String str2 = "";
        ArrayList arrayList2 = new ArrayList();
        int i2 = 0;
        try {
            RSATWSPortType rSATWSPortType = new RSATWebServicesLocator().getRSATWSPortType(new URL("http://rsat.ulb.ac.be/rsat/web_services/RSATWS.cgi"));
            GeneInfoRequest geneInfoRequest = new GeneInfoRequest();
            geneInfoRequest.setQuery(strArr);
            geneInfoRequest.setOrganism(str);
            geneInfoRequest.setOutput("client");
            for (String str3 : rSATWSPortType.gene_info(geneInfoRequest).getClient().split("\n")) {
                if (!str3.startsWith(";")) {
                    String[] split = str3.split("\t");
                    String str4 = split[1];
                    String str5 = split[6];
                    if (str5.contains(";")) {
                        str5 = str5.split(";")[0];
                    }
                    while (str5.contains("\\[")) {
                        str5 = str5.split("\\[")[0];
                    }
                    for (String str6 : str5.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                        str2 = String.valueOf(str2) + str6 + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR;
                        if (str2.length() > 20) {
                            arrayList2.add(str2);
                            str2 = "";
                        }
                    }
                    if (!str2.equals("")) {
                        arrayList2.add(str2);
                    }
                    arrayList.add(str4);
                    arrayList.add(arrayList2);
                    hashMap.put(list.get(i2), arrayList);
                    i2++;
                    str2 = "";
                    arrayList = new ArrayList();
                    arrayList2 = new ArrayList();
                }
            }
        } catch (Exception e) {
            System.err.println(e.toString());
        }
        return hashMap;
    }

    public static void main(String[] strArr) {
        new HashSet().add("sce");
        ArrayList arrayList = new ArrayList();
        new ArrayList();
        arrayList.add("YIR029W");
        arrayList.add("YCL064C");
        arrayList.add("YLR089C");
        System.out.println(getGeneNamesAndDescriptionsFromRSATInfoGene(arrayList, "Saccharomyces_cerevisiae"));
    }
}
