/*
 * Decompiled with CFR 0.152.
 */
package edu.msu.cme.rdp.classifier.train.validation.crossvalidate;

import edu.msu.cme.rdp.classifier.train.validation.crossvalidate.CrossValidate;
import java.io.File;
import java.io.IOException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;

public class CrossValidateMain {
    private static Options options = new Options();

    public static void main(String[] args) throws IOException {
        String tax_file = null;
        String source_file = null;
        String out_file = null;
        Integer partialLength = null;
        float fraction = 0.1f;
        String rdmSelectedRank = null;
        int min_bootstrap_words = 5;
        try {
            CommandLine line = new PosixParser().parse(options, args);
            if (!line.hasOption("t")) {
                throw new ParseException("Taxonomy file must be specified");
            }
            tax_file = line.getOptionValue("t");
            if (!line.hasOption("s")) {
                throw new ParseException("Source training fasta file must be specified");
            }
            source_file = line.getOptionValue("s");
            if (!line.hasOption("o")) {
                throw new ParseException("Output file must be specified");
            }
            out_file = line.getOptionValue("o");
            if (line.hasOption("l")) {
                partialLength = new Integer(line.getOptionValue("l"));
            }
            if (line.hasOption("fraction")) {
                fraction = Float.parseFloat(line.getOptionValue("fraction"));
            }
            if (line.hasOption("rdmRank")) {
                rdmSelectedRank = line.getOptionValue("rdmRank");
            }
            if (line.hasOption("w") && (min_bootstrap_words = Integer.parseInt(line.getOptionValue("w"))) < 5) {
                throw new IllegalArgumentException(min_bootstrap_words + " must be at least " + 5);
            }
        }
        catch (ParseException ex) {
            new HelpFormatter().printHelp(120, "CrossValidateMain", "", options, "", true);
            return;
        }
        boolean useSeed = true;
        CrossValidate theObj = new CrossValidate();
        theObj.runTest(new File(tax_file), new File(source_file), new File(out_file), rdmSelectedRank, fraction, partialLength, useSeed, min_bootstrap_words);
    }

    static {
        options.addOption(new Option("s", "trainSeqFile", true, "training files in fasta format labelled with the lineage information. \nThe header of this fasta file starts with '>', followed by the sequence name, white space(s) and a list taxon names seperated by ';' with highest rank taxon first\nex: Root;Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacteriales;Enterobacteriaceae;Enterobacter"));
        options.addOption(new Option("t", "trainTaxonFile", true, "contains the hierarchical taxonomy information, taxon name and rank together is unique. \nThe format looks like the following: taxid*taxon name*parent taxid*depth*rank Note taxid, the parent taxid and depth should be in integer format. depth indicates the depth from the root taxon."));
        options.addOption(new Option("o", "outputFile", true, "stat of leave-one-out testing including correctness rate at each rank, misclassified rate for each taxon "));
        options.addOption(new Option("l", "length", true, "the default is to test the entire query sequence. if specifiy a length, a region of the query sequence with the specified length will be random choosen for testing"));
        options.addOption(new Option("w", "minWords", true, "minimum number of words for each bootstrap trial. Default(maximum) is 1/8 of the words of each sequence. Minimum is 5"));
        options.addOption("f", "fraction", true, "fraction of the complete set as test set, default is 0.1");
        options.addOption("r", "rdmRank", true, "if specified, random select a fraction of taxa at the given rank, and use all the sequence assigned to the selected taxa as test set. If rank is not specified, a fraction of sequences will be selected from the source file to use as test set");
    }
}

