package be.ac.ulb.bigre.pathwayinference.core.data;

import be.ac.ulb.bigre.pathwayinference.core.core.PathwayinferenceConstants;
import be.ac.ulb.bigre.pathwayinference.core.io.IOTools;
import be.ac.ulb.bigre.pathwayinference.core.util.MetabolicGraphFilter;
import be.ac.ulb.scmbb.snow.graph.core.Data;
import be.ac.ulb.scmbb.snow.graph.core.Graph;
import be.ac.ulb.scmbb.snow.graph.core.GraphDataLinker;
import be.ac.ulb.scmbb.snow.graph.core.Node;
import cern.colt.matrix.impl.AbstractFormatter;
import graphtools.util.GraphtoolsConstants;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Vector;
import java.util.logging.FileHandler;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;
import org.apache.commons.cli.HelpFormatter;

/* loaded from: input_file:be/ac/ulb/bigre/pathwayinference/core/data/KeggLigandParser.class */
public class KeggLigandParser {
    private String _compFileLocation;
    private String _rctFileLocation;
    private String _version;
    private Data _reactionGraphData;
    private GraphDataLinker _reactionGraphDataLinker;
    private String _fileWithNodesToRemove;
    private String _filteringReport;
    private static final String ENTRY_DELIMITER = "///";
    private static final String ENTRY = "ENTRY";
    private static final String NAME = "NAME";
    private static final String DEFINITION = "DEFINITION";
    private static final String EQUATION = "EQUATION";
    private static final String ENZYME = "ENZYME";
    private static final String FORMULA = "FORMULA";
    private static final String MASS = "MASS";
    private static final String DBLINKS = "DBLINKS";
    private static final String PUBCHEM = "PubChem";
    private static final String CHEBI = "ChEBI";
    private static final String CAS = "CAS";
    private static final String OBSOLETE = "Obsolete";
    private static final String PKNRP = "PKNRP";
    private static final String NRP = "NRP";
    private static final String PEPTIDE = "Peptide";
    private static final String PK = "PK";
    private static String EC_NUMBER = "EC";
    private static Logger LOGGER = Logger.getLogger(KeggLigandParser.class.getName());
    private static String DBLINK_SEPARATOR = ": ";
    public static String LOGFILE_PREFIX = "KeggLigandParser";
    public static String UNKNOWN_ATTRIB_VALUE = "NA";
    private String _dateString = "";
    private int _reactionInFileNum = 0;
    private int _compInFileNum = 0;
    private int _skippedReactionNum = 0;
    public String exclusionAttribute = GraphtoolsConstants.DEFAULT_EXCLUSION_ATTRIBUTE;
    public String logFileLocation = "";
    public boolean logToFile = false;
    public boolean verbose = false;
    private HashMap<String, ArrayList<Object>> _rctIdVsAttribs = new HashMap<>();
    private HashMap<String, ArrayList<Object>> _compIdVsAttribs = new HashMap<>();

    public KeggLigandParser(String str, String str2, String str3) {
        this._compFileLocation = "";
        this._rctFileLocation = "";
        this._version = "";
        this._rctFileLocation = str2;
        this._compFileLocation = str;
        this._version = str3;
        this._reactionGraphDataLinker = GraphDataLinker.newGraphDataLinker(Graph.newGraph("KEGG_LIGAND_version" + str3));
        this._reactionGraphData = Data.newData("KEGG_LIGAND_version" + str3 + "_data");
        setFileWithNodesToRemove("");
        setFilteringReport("");
    }

    private void logToFile() {
        String str = String.valueOf(LOGFILE_PREFIX) + PathwayinferenceConstants.REACTION_SUBREACTION_JOINER + this._dateString + ".log";
        if (!this.logFileLocation.equals("")) {
            str = String.valueOf(this.logFileLocation) + PathwayinferenceConstants.PATH_SEPARATOR + str;
        }
        try {
            FileHandler fileHandler = new FileHandler(str);
            fileHandler.setFormatter(new SimpleFormatter());
            LOGGER.addHandler(fileHandler);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void parseEductsAndProductsFromEquation(String str, ArrayList<Object> arrayList) {
        Vector vector = new Vector();
        Vector vector2 = new Vector();
        String trim = str.split(EQUATION)[1].trim();
        String str2 = trim.split(" <=> ")[0];
        if (str2.contains(" + ")) {
            for (String str3 : str2.split(" \\+ ")) {
                if (str3.contains(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                    str3 = str3.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)[1];
                }
                vector.add(str3);
            }
        } else {
            String trim2 = str2.trim();
            if (trim2.contains(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                trim2 = trim2.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)[1];
            }
            vector.add(trim2);
        }
        String str4 = trim.split(" <=> ")[1];
        if (str4.contains(" + ")) {
            for (String str5 : str4.split(" \\+ ")) {
                if (str5.contains(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                    str5 = str5.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)[1];
                }
                vector2.add(str5);
            }
        } else {
            String trim3 = str4.trim();
            if (trim3.contains(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                trim3 = trim3.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)[1];
            }
            vector2.add(trim3);
        }
        arrayList.add(vector);
        arrayList.add(vector2);
    }

    private void parseReactionFile() {
        String str = "";
        String str2 = "";
        new Vector();
        ArrayList<Object> arrayList = new ArrayList<>();
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = false;
        boolean z6 = false;
        boolean z7 = false;
        boolean z8 = false;
        int i = 0;
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(this._rctFileLocation));
            while (true) {
                String readLine = bufferedReader.readLine();
                String str3 = readLine;
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                i++;
                if (str3.startsWith(ENTRY)) {
                    this._reactionInFileNum++;
                    z = false;
                    z2 = false;
                    z6 = false;
                    z4 = false;
                    z3 = false;
                    z5 = false;
                    z7 = false;
                    z8 = false;
                    arrayList = new ArrayList<>();
                    str2 = str3.split(ENTRY)[1].split("Reaction")[0].trim();
                }
                if (str3.startsWith(NAME)) {
                    arrayList.add(str3.split(NAME)[1].trim());
                    z3 = true;
                }
                if (str3.startsWith(DEFINITION)) {
                    arrayList.add(str3.split(DEFINITION)[1].trim());
                    z5 = true;
                }
                if (str3.startsWith("COMMENT")) {
                    z2 = true;
                }
                if (str3.startsWith("RPAIR")) {
                    z6 = true;
                }
                if (str3.startsWith("PATHWAY")) {
                    z7 = true;
                }
                if (str3.startsWith("ORTHOLOGY")) {
                    z8 = true;
                }
                if (str3.startsWith(ENZYME)) {
                    Vector vector = new Vector();
                    String trim = str3.split(ENZYME)[1].trim();
                    if (trim.contains(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                        for (String str4 : trim.split(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR)) {
                            String trim2 = str4.trim();
                            if (!trim2.equals("")) {
                                vector.add(trim2);
                            }
                        }
                    } else {
                        vector.add(trim);
                    }
                    arrayList.add(vector);
                    z4 = true;
                }
                if (str3.contains(PathwayinferenceConstants.KEGG_COMPOUND) && z && !z2 && !z6 && !z4 && !z7 && !z8) {
                    String str5 = String.valueOf(str) + AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + str3.trim();
                    LOGGER.warning("Equation of reaction " + str2 + " in line " + i + " takes two lines!");
                    LOGGER.info("Restored line: " + str5);
                    arrayList.remove(arrayList.size() - 1);
                    arrayList.remove(arrayList.size() - 1);
                    parseEductsAndProductsFromEquation(str5, arrayList);
                    str3 = str3;
                }
                if (str3.startsWith(EQUATION)) {
                    parseEductsAndProductsFromEquation(str3, arrayList);
                    z = true;
                }
                if (str3.startsWith("///")) {
                    if (arrayList.size() != 5) {
                        String str6 = "Missing entry for reaction " + str2 + "! ";
                        if (!z3) {
                            arrayList.add(0, UNKNOWN_ATTRIB_VALUE);
                            str6 = String.valueOf(str6) + "Reaction name is missing! ";
                        }
                        if (!z5) {
                            arrayList.add(1, UNKNOWN_ATTRIB_VALUE);
                            str6 = String.valueOf(str6) + "Equation is missing! ";
                        }
                        if (!z) {
                            arrayList.add(2, new Vector());
                            arrayList.add(3, new Vector());
                            str6 = String.valueOf(str6) + "Educts and products are missing! ";
                        }
                        if (!z4) {
                            arrayList.add(new Vector());
                            str6 = String.valueOf(str6) + "EC number is missing!";
                        }
                        LOGGER.warning(str6);
                    }
                    this._rctIdVsAttribs.put(str2, arrayList);
                }
                str = str3.replace("\n", "");
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }

    private void parseCompoundFile() {
        ArrayList<Object> arrayList = new ArrayList<>();
        String str = "";
        String str2 = "";
        String str3 = "";
        Vector vector = new Vector();
        Vector vector2 = new Vector();
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(this._compFileLocation));
            while (true) {
                String readLine = bufferedReader.readLine();
                String str4 = readLine;
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                if (str4.startsWith(ENTRY)) {
                    this._compInFileNum++;
                    z3 = false;
                    z2 = false;
                    z = false;
                    str2 = "";
                    str3 = "";
                    vector = new Vector();
                    vector2 = new Vector();
                    arrayList = new ArrayList<>();
                    if (str4.contains(OBSOLETE)) {
                        str4 = str4.replace(OBSOLETE, "");
                        z3 = true;
                    }
                    if (str4.contains(PK)) {
                        str4 = str4.replace(PK, "");
                    }
                    if (str4.contains(NRP)) {
                        str4 = str4.replace(NRP, "");
                    }
                    if (str4.contains(PKNRP)) {
                        str4 = str4.replace(PKNRP, "");
                    }
                    if (str4.contains(PEPTIDE)) {
                        str4 = str4.replace(PEPTIDE, "");
                    }
                    str = str4.split(ENTRY)[1].split(PathwayinferenceConstants.COMPOUND)[0].trim();
                }
                if (str4.startsWith(MASS)) {
                    z = false;
                }
                if (str4.startsWith("REACTION")) {
                    z = false;
                }
                if (str4.startsWith(FORMULA)) {
                    z = false;
                    str3 = str4.split(FORMULA)[1].trim();
                }
                if (z) {
                    String trim = str4.trim();
                    if (trim.endsWith(";")) {
                        trim = trim.substring(0, trim.lastIndexOf(";"));
                    }
                    vector.add(trim);
                }
                if (str4.startsWith(NAME)) {
                    str2 = str4.split(NAME)[1].trim();
                    if (str2.endsWith(";")) {
                        str2 = str2.substring(0, str2.lastIndexOf(";"));
                    }
                    z = true;
                }
                if (str4.startsWith("ATOM")) {
                    z2 = false;
                }
                if (str4.startsWith("BOND")) {
                    z2 = false;
                }
                if (z2) {
                    vector2.add(str4.trim());
                }
                if (str4.startsWith(DBLINKS)) {
                    z2 = true;
                    vector2.add(str4.split(DBLINKS)[1].trim());
                }
                if (str4.startsWith("///")) {
                    if (z3) {
                        LOGGER.info("Skipping obsolete compound " + str + "!");
                    } else {
                        if (str2.equals("")) {
                            LOGGER.warning("Missing name for compound " + str + "!");
                            arrayList.add(UNKNOWN_ATTRIB_VALUE);
                        } else {
                            arrayList.add(str2);
                        }
                        arrayList.add(vector);
                        if (vector.isEmpty()) {
                            LOGGER.warning("Missing synonyms for compound " + str + "!");
                        }
                        if (str3.equals("")) {
                            LOGGER.warning("Missing formula for compound " + str + "!");
                            arrayList.add(UNKNOWN_ATTRIB_VALUE);
                        } else {
                            arrayList.add(str3);
                        }
                        arrayList.add(vector2);
                        if (vector2.isEmpty()) {
                            LOGGER.warning("Missing db links for compound " + str + "!");
                        }
                        this._compIdVsAttribs.put(str, arrayList);
                    }
                }
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }

    private void addCompoundNode(String str) {
        if (getReactionGraphDataLinker().getGraph().hasNode(str)) {
            return;
        }
        ArrayList<Object> arrayList = new ArrayList<>();
        Vector vector = new Vector();
        if (this._compIdVsAttribs.containsKey(str)) {
            arrayList = this._compIdVsAttribs.get(str);
            vector = (Vector) arrayList.get(3);
        } else {
            LOGGER.warning("For compound " + str + " no attributes could be parsed from compound file!");
        }
        getReactionGraphDataLinker().getGraph().addNode(str);
        this._reactionGraphData.put(str, "ObjectType", PathwayinferenceConstants.COMPOUND);
        this._reactionGraphData.put(str, this.exclusionAttribute, str);
        if (arrayList.isEmpty()) {
            this._reactionGraphData.put(str, "Label", str);
            return;
        }
        this._reactionGraphData.put(str, "Label", arrayList.get(0));
        this._reactionGraphData.put(str, PathwayinferenceConstants.SYNONYMES, arrayList.get(1));
        this._reactionGraphData.put(str, PathwayinferenceConstants.FORMULA, arrayList.get(2));
        if (vector.isEmpty()) {
            return;
        }
        Iterator it = vector.iterator();
        while (it.hasNext()) {
            String str2 = (String) it.next();
            if (str2.contains(PUBCHEM)) {
                this._reactionGraphData.put(str, PUBCHEM, str2.split(DBLINK_SEPARATOR)[1]);
            }
            if (str2.contains(CHEBI)) {
                this._reactionGraphData.put(str, CHEBI, str2.split(DBLINK_SEPARATOR)[1]);
            }
            if (str2.contains(CAS)) {
                this._reactionGraphData.put(str, CAS, str2.split(DBLINK_SEPARATOR)[1]);
            }
        }
    }

    private void addReactionNode(String str) {
        ArrayList<Object> arrayList = this._rctIdVsAttribs.get(str);
        if (getReactionGraphDataLinker().getGraph().hasNode(String.valueOf(str) + PathwayinferenceConstants.DIRECT_REACTION) || getReactionGraphDataLinker().getGraph().hasNode(String.valueOf(str) + PathwayinferenceConstants.REVERSE_REACTION)) {
            return;
        }
        String str2 = String.valueOf(str) + PathwayinferenceConstants.DIRECT_REACTION;
        getReactionGraphDataLinker().getGraph().addNode(str2);
        this._reactionGraphData.put(str2, "ObjectType", "Reaction");
        this._reactionGraphData.put(str2, this.exclusionAttribute, str);
        this._reactionGraphData.put(str2, "Label", arrayList.get(0));
        this._reactionGraphData.put(str2, "Equation", arrayList.get(1));
        this._reactionGraphData.put(str2, EC_NUMBER, arrayList.get(4));
        String str3 = String.valueOf(str) + PathwayinferenceConstants.REVERSE_REACTION;
        getReactionGraphDataLinker().getGraph().addNode(str3);
        this._reactionGraphData.put(str3, "ObjectType", "Reaction");
        this._reactionGraphData.put(str3, this.exclusionAttribute, str);
        this._reactionGraphData.put(str3, "Label", arrayList.get(0));
        this._reactionGraphData.put(str3, "Equation", arrayList.get(1));
        this._reactionGraphData.put(str3, EC_NUMBER, arrayList.get(4));
    }

    private void buildReactionGraphDataLinker() {
        new Vector();
        new Vector();
        for (String str : this._rctIdVsAttribs.keySet()) {
            if (str.matches("^R[\\d]{5}")) {
                Vector vector = (Vector) this._rctIdVsAttribs.get(str).get(2);
                Vector vector2 = (Vector) this._rctIdVsAttribs.get(str).get(3);
                boolean z = false;
                Iterator it = vector.iterator();
                while (it.hasNext()) {
                    String str2 = (String) it.next();
                    if (!str2.matches("^C[\\d]{5}")) {
                        LOGGER.warning("Educt " + str2 + " of reaction " + str + " does not match the KEGG compound regexp!");
                        if (!str2.startsWith("G")) {
                            LOGGER.warning("Offending compound is not a glycan!");
                        }
                        z = true;
                    }
                }
                Iterator it2 = vector2.iterator();
                while (it2.hasNext()) {
                    String str3 = (String) it2.next();
                    if (!str3.matches("^C[\\d]{5}")) {
                        LOGGER.warning("Product " + str3 + " of reaction " + str + " does not match the KEGG compound regexp!");
                        if (!str3.startsWith("G")) {
                            LOGGER.warning("Offending compound is not a glycan!");
                        }
                        z = true;
                    }
                }
                if (z) {
                    LOGGER.info("Skipping reaction " + str + " because it involves KEGG compounds not matching the KEGG compound regexp!");
                    this._skippedReactionNum++;
                } else {
                    addReactionNode(str);
                    Node node = getReactionGraphDataLinker().getGraph().getNode(String.valueOf(str) + PathwayinferenceConstants.DIRECT_REACTION);
                    Node node2 = getReactionGraphDataLinker().getGraph().getNode(String.valueOf(str) + PathwayinferenceConstants.REVERSE_REACTION);
                    Iterator it3 = vector.iterator();
                    while (it3.hasNext()) {
                        String str4 = (String) it3.next();
                        addCompoundNode(str4);
                        Node node3 = getReactionGraphDataLinker().getGraph().getNode(str4);
                        String str5 = String.valueOf(node3.getIdentifier()) + "->" + node.getIdentifier();
                        if (!getReactionGraphDataLinker().getGraph().hasArc(str5)) {
                            getReactionGraphDataLinker().getGraph().addArc(str5, node3, node);
                        }
                        String str6 = String.valueOf(node2.getIdentifier()) + "->" + node3.getIdentifier();
                        if (!getReactionGraphDataLinker().getGraph().hasArc(str6)) {
                            getReactionGraphDataLinker().getGraph().addArc(str6, node2, node3);
                        }
                    }
                    Iterator it4 = vector2.iterator();
                    while (it4.hasNext()) {
                        String str7 = (String) it4.next();
                        addCompoundNode(str7);
                        Node node4 = getReactionGraphDataLinker().getGraph().getNode(str7);
                        String str8 = String.valueOf(node.getIdentifier()) + "->" + node4.getIdentifier();
                        if (!getReactionGraphDataLinker().getGraph().hasArc(str8)) {
                            getReactionGraphDataLinker().getGraph().addArc(str8, node, node4);
                        }
                        String str9 = String.valueOf(node4.getIdentifier()) + "->" + node2.getIdentifier();
                        if (!getReactionGraphDataLinker().getGraph().hasArc(str9)) {
                            getReactionGraphDataLinker().getGraph().addArc(str9, node4, node2);
                        }
                    }
                }
            } else {
                LOGGER.info("Skipping reaction " + str + " because it is not match the KEGG reaction regexp!");
                this._skippedReactionNum++;
            }
        }
        getReactionGraphDataLinker().addData(this._reactionGraphData);
    }

    public void parseKeggLigand() {
        Date date = new Date();
        Calendar calendar = Calendar.getInstance();
        calendar.setTime(date);
        this._dateString = String.valueOf(calendar.get(5)) + HelpFormatter.DEFAULT_OPT_PREFIX + (calendar.get(2) + 1) + HelpFormatter.DEFAULT_OPT_PREFIX + calendar.get(1);
        if (this.logToFile) {
            logToFile();
        }
        LOGGER.info("Indexing reaction file...");
        parseReactionFile();
        LOGGER.info("Indexing compound file...");
        parseCompoundFile();
        LOGGER.info("Parsing done. Constructing KEGG LIGAND graph...");
        buildReactionGraphDataLinker();
        System.out.println("Node number: " + getReactionGraphDataLinker().getGraph().getNumNodes());
        System.out.println("Arc number: " + getReactionGraphDataLinker().getGraph().getNumArcs());
        String str = String.valueOf(String.valueOf(String.valueOf(String.valueOf("# Date of graph construction: " + this._dateString + "\n") + "# Graph constructed from compound file " + IOTools.getFileWithoutDir(this._compFileLocation) + " and reaction file " + IOTools.getFileWithoutDir(this._rctFileLocation) + " of KEGG LIGAND version " + this._version + "\n") + "# Compound file contained " + this._compInFileNum + " entries.\n") + "# Reaction file contained " + this._reactionInFileNum + " entries.\n") + "# " + this._skippedReactionNum + " reactions were skipped during parsing because they involved glycans or compounds with dubious stoichiometry (n or m in brackets).\n";
        getReactionGraphDataLinker().getDatas().get(0).put(getReactionGraphDataLinker().getGraph().getIdentifier(), "#", str);
        LOGGER.info("Filtering KEGG LIGAND graph to remove orphans and dubious reactions...");
        MetabolicGraphFilter metabolicGraphFilter = new MetabolicGraphFilter(getReactionGraphDataLinker(), this.exclusionAttribute);
        if (!getFileWithNodesToRemove().equals("")) {
            metabolicGraphFilter.setFileWithNodesToRemove(getFileWithNodesToRemove());
            metabolicGraphFilter.removeGivenNodes = true;
        }
        metabolicGraphFilter.filterDubiousReactions = true;
        metabolicGraphFilter.removeOrphans = true;
        metabolicGraphFilter.standardGraph = true;
        metabolicGraphFilter.verbose = this.verbose;
        metabolicGraphFilter.filter();
        setFilteringReport(metabolicGraphFilter.assembleReport());
        setReactionGraphDataLinker(metabolicGraphFilter.getMetabolicGraphDataLinker());
        LOGGER.info("Done");
        if (this.verbose) {
            System.out.println(str);
        }
    }

    public void setReactionGraphDataLinker(GraphDataLinker graphDataLinker) {
        this._reactionGraphDataLinker = graphDataLinker;
    }

    public GraphDataLinker getReactionGraphDataLinker() {
        return this._reactionGraphDataLinker;
    }

    public void setFileWithNodesToRemove(String str) {
        this._fileWithNodesToRemove = str;
    }

    public String getFileWithNodesToRemove() {
        return this._fileWithNodesToRemove;
    }

    public void setFilteringReport(String str) {
        this._filteringReport = str;
    }

    public String getFilteringReport() {
        return this._filteringReport;
    }

    public static void main(String[] strArr) {
        KeggLigandParser keggLigandParser = new KeggLigandParser("/Users/karoline/Documents/dev_workspace/rsa-tools/data/KEGG/KEGG_LIGAND/compound_19-1-2009.txt", "/Users/karoline/Documents/dev_workspace/rsa-tools/data/KEGG/KEGG_LIGAND/reaction_19-1-2009.txt", "49.0");
        keggLigandParser.verbose = true;
        keggLigandParser.parseKeggLigand();
    }
}
