suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))

### Parsing command line ------------------------------------------------------

option_list <- list(
    make_option(c("-i", "--sampleid"), action="store", type="character", default=NULL,
        help="Sample id"),
    make_option(c("--normal"), action="store", type="character", default=NULL,
        help="Input: normal coverage, GC-normalized. Optional if normaldb or segfile is provided."),
    make_option(c("--tumor"), action="store", type="character", default=NULL,
        help="Input: tumor coverage, GC-normalized"),
    make_option(c("--vcf"), action="store", type="character", default=NULL,
        help="Input: VCF file"),
    make_option(c("--rds"), action="store", type="character", default=NULL,
        help="Input: PureCN output RDS file, used to regenerate plots and files after manual curation"),
    make_option(c("--normal_panel"), action="store", type="character", default=NULL,
        help="Input: VCF containing calls from a panel of normals, for example generated by GATK CombineVariants."),
    make_option(c("--normaldb"), action="store", type="character", default=NULL,
        help="Input: NormalDB.rds file. Generated by NormalDB.R."),
    make_option(c("--segfile"), action="store", type="character", default=NULL,
        help="Input: Segmentation file"),
    make_option(c("--sex"), action="store", type="character", 
        default=formals(PureCN::runAbsoluteCN)$sex[[2]],
        help="Input: Sex of sample. ? (detect), diplod (non-diploid chromosomes removed), F or M [default %default]"),
    make_option(c("--pool"), action="store", type="integer", default=10,
        help="Pool n best normals for log-ratio calculation [default %default]"),
    make_option(c("--genome"), action="store", type="character", default=NULL,
        help="Assay: Genome version [default %default]"),
    make_option(c("--gcgene"), action="store", type="character", default=NULL,
        help="Assay: Interval file as generated by IntervalFile.R"),
    make_option(c("--statsfile"), action="store", type="character", default=NULL,
        help="VCF Filter: MuTect stats file, used to filter artifacts"),
    make_option(c("--minaf"), action="store", type="double", default=0.03,
        help="VCF Filter: minimum allelic fraction [default %default]"),
    make_option(c("--snpblacklist"), action="store", type="character", default=NULL,
        help="VCF Filter: File parsable by rtracklayer that defines blacklisted regions"),
    make_option(c("--error"), action="store", type="double", 
        default=formals(PureCN::runAbsoluteCN)$error,
        help="VCF Filter: Estimated sequencing error rate for artifact filtering [default %default]"),
    make_option(c("--funsegmentation"), action="store", type="character", default="CBS",
        help="Segmentation: Algorithm. CBS, PSCBS or none [default %default]"),
    make_option(c("--alpha"), action="store", type="double", 
        default=formals(PureCN::segmentationCBS)$alpha,
        help="Segmentation: significance of breakpoints [default %default]"),
    make_option(c("--maxsegments"), action="store", type="double", 
        default=formals(PureCN::runAbsoluteCN)$max.segments,
        help="Segmentation: Flag noisy samples with many segments [default %default]"),
    make_option(c("--targetweightfile"), action="store", type="character", default=NULL,
        help="Segmentation: Weights of targets. Generated by NormalDB.R."),
    make_option(c("--minpurity"), action="store", type="double", 
        default=formals(PureCN::runAbsoluteCN)$test.purity[[2]],
        help="Minimum considered purity [default %default]"),
    make_option(c("--maxpurity"), action="store", type="double", 
        default=formals(PureCN::runAbsoluteCN)$test.purity[[3]],
        help="Maximum considered purity [default %default]"),
    make_option(c("--minploidy"), action="store", type="double", default=1,
        help="Minimum considered ploidy [default %default]"),
    make_option(c("--maxploidy"), action="store", type="double", default=6,
        help="Maximum considered ploidy [default %default]"),
    make_option(c("--postoptimize"), action="store_true", default=FALSE, 
        help="Post-optimization [default %default]"),
    make_option(c("--modelhomozygous"), action="store_true", default=FALSE, 
        help="Model homozygous variants in very pure samples [default %default]"),
    make_option(c("--model"), action="store", type="character", 
        default=formals(PureCN::runAbsoluteCN)$model[[2]],
        help="Model used to fit variants. Either beta or betabin [default %default]."),
    make_option(c("--logratiocalibration"), action="store", type="double", 
        default=formals(PureCN::runAbsoluteCN)$log.ratio.calibration,
        help="Parameter defining the extend to which log-ratios might be miscalibrated [default %default]."),
    make_option(c("--outvcf"), action="store_true", default=FALSE,
        help="Output: Annotate input VCF with posterior probabilities. Otherwise produce CSV file."),
    make_option(c("--out"), action="store", type="character", default=NULL,
        help=paste("Output: File name prefix to which results should be written.",
        "If out is a directory, will use out/sampleid.")),
    make_option(c("--seed"), action="store", type="integer", default=NULL,
        help="Seed for random number generator [default %default]"),
    make_option(c("-v", "--version"), action="store_true", default=FALSE, 
        help="Print PureCN version"),
    make_option(c("-f", "--force"), action="store_true", default=FALSE, 
        help="Overwrite existing files")
)

opt <- parse_args(OptionParser(option_list=option_list))

if (opt$version) {
    message(as.character(packageVersion("PureCN")))
    q(status=1)
}    

if (!is.null(opt$seed)) {
    set.seed(opt$seed)
}
    
tumor.coverage.file <- opt$tumor
normal.coverage.file <- opt[["normal"]]
snp.blacklist <- opt$snpblacklist

if (!is.null(snp.blacklist)) {
    snp.blacklist <- strsplit(snp.blacklist, ",")[[1]]
}

seg.file <- opt$segfile
normalDB <- opt$normaldb
sampleid <- opt$sampleid
out <- opt[["out"]]
pool <- opt$pool
file.rds <- opt$rds

.getFilePrefix <- function(out, sampleid) {
    isDir <- file.info(out)$isdir
    if (!is.na(isDir) && isDir) return(file.path(out, sampleid))
    out
}

if (!is.null(file.rds) && file.exists(file.rds)) {
    if (is.null(out)) out <- sub(".rds$", "", file.rds)
} else {
    if (is.null(sampleid)) stop("Need --sampleid.")
    if (is.null(opt$genome)) stop("Need --genome")
    out <- .getFilePrefix(out, sampleid)    
    file.rds <- paste0(out, ".rds")
    if (is.null(seg.file)) {
        tumor.coverage.file <- normalizePath(tumor.coverage.file, 
            mustWork=TRUE)
    }
}
    
normalizePath(dirname(out), mustWork=TRUE)

flog.info("Loading PureCN...")
suppressPackageStartupMessages(library(PureCN))
library(futile.logger)

debug <- FALSE
if (Sys.getenv("PURECN_DEBUG") != "") {
    flog.threshold("DEBUG")
    debug <- TRUE
}    

### Run PureCN ----------------------------------------------------------------

if (file.exists(file.rds) && !opt$force) {
    flog.info("%s already exists. Skipping... (--force will overwrite)", 
        file.rds)
    ret <- readCurationFile(file.rds)
    if (is.null(sampleid)) sampleid <- ret$input$sampleid
} else {    
    tumor.coverage.file.orig <- tumor.coverage.file
    if (!is.null(normalDB)) {
        if (!is.null(seg.file)) stop("normalDB and segfile do not work together.")
        normalDB <- readRDS(normalDB)
    }    

    .getNormalCoverage <- function(normal.coverage.file) {
        if (!is.null(normalDB)) {
            if (is.null(normal.coverage.file)) {
                if (!is.null(pool)) {
                    num.normals <- pool
                    pool <- TRUE
                } else {
                    num.normals <- 1
                    pool <- FALSE
                }    
                normal.coverage.file <- findBestNormal(tumor.coverage.file, 
                    normalDB, pool=pool, num.normals=num.normals)
            }
        } else if (is.null(normal.coverage.file) && is.null(seg.file)) {
            stop("Need either normalDB or normal.coverage.file")
        }    
        normal.coverage.file
    }
    normal.coverage.file <- .getNormalCoverage(normal.coverage.file)
    file.log <- paste0(out, '.log')

    pdf(paste0(out, '_segmentation.pdf'), width=10, height=11)
    af.range <- c(opt$minaf, 1-opt$minaf)
    test.purity <- seq(opt$minpurity, opt$maxpurity, by = 0.01)

    fun.segmentation <- segmentationCBS
    if (opt$funsegmentation != "CBS") {
        if (opt$funsegmentation == "PSCBS") {
            fun.segmentation <- segmentationPSCBS
        } else if (opt$funsegmentation == "none") {
            fun.segmentation <- function(seg, ...) seg
        } else {
            stop("Unknown segmentation function")
        }
    } 
        
    ret <- runAbsoluteCN(normal.coverage.file=normal.coverage.file, 
            tumor.coverage.file=tumor.coverage.file, vcf.file=opt$vcf,
            sampleid=sampleid, gc.gene.file=opt$gcgene, plot.cnv=TRUE,
            genome=opt$genome, seg.file=seg.file,
            test.purity=test.purity,
            sex=opt$sex,
            args.filterVcf=list(snp.blacklist=snp.blacklist, 
                af.range=af.range, stats.file=opt$statsfile), 
            fun.segmentation=fun.segmentation,    
            args.segmentation=list(target.weight.file=opt$targetweightfile, 
                alpha=opt$alpha), 
            args.setMappingBiasVcf=
                list(normal.panel.vcf.file=opt$normal_panel),
            normalDB=normalDB, model.homozygous=opt$modelhomozygous,
            min.ploidy=opt$minploidy, max.ploidy=opt$maxploidy, 
            model=opt[["model"]], log.file=file.log, 
            max.segments=opt$maxsegments,
            error=opt$error, 
            log.ratio.calibration=opt$logratiocalibration,
            post.optimize=opt$postoptimize)
    dev.off()
    saveRDS(ret, file=file.rds)
}

### Create output files -------------------------------------------------------

curationFile <- createCurationFile(file.rds)
if (debug) {
    curationFile$log.ratio.offset <- mean(ret$results[[1]]$log.ratio.offset)
    curationFile$log.ratio.sdev <- ret$input$log.ratio.sdev
    curationFile$num.segments <- nrow(ret$results[[1]]$seg)
    write.csv(curationFile, file=paste0(out, '_debug.csv'), row.names=FALSE)
}
file.pdf <- paste0(out, '.pdf')
pdf(file.pdf, width=10, height=11)
plotAbs(ret, type='all')
dev.off()

file.png <- paste0(out, '_contamination.png')
png(file.png, width=800)
plotAbs(ret,1, type='contamination')
dev.off()

if (opt$outvcf) {
    file.vcf <- paste0(out, '.vcf')
    vcfanno <- predictSomatic(ret, return.vcf=TRUE, 
        vcf.field.prefix="PureCN.")
    writeVcf(vcfanno, file=file.vcf)    
} 
file.csv <- paste0(out, '_variants.csv')
write.csv(cbind(Sampleid=sampleid, predictSomatic(ret)), file=file.csv, 
    row.names=FALSE, quote=FALSE)

file.loh <- paste0(out, '_loh.csv')
write.csv(cbind(Sampleid=sampleid, callLOH(ret)), file=file.loh, 
    row.names=FALSE, quote=FALSE)

file.seg <- paste0(out, '_dnacopy.seg')
seg <- ret$results[[1]]$seg
seg <- seg[,c(1:6, match("C", colnames(seg)))]
write.table(seg, file=file.seg, sep="\t", quote=FALSE, 
    row.names=FALSE)

file.genes <- paste0(out, '_genes.csv')
allAlterations <- callAlterations(ret, all.genes=TRUE)

write.csv(cbind(Sampleid=sampleid, gene.symbol=rownames(allAlterations), 
    allAlterations), row.names=FALSE, file=file.genes, quote=FALSE)

if (!is.null(ret$input$vcf)) {
    file.pdf <- paste0(out, '_chromosomes.pdf')
    pdf(file.pdf, width=9, height=10)
    vcf <- ret$input$vcf[ret$results[[1]]$SNV.posterior$vcf.ids]
    chromosomes <- seqlevelsInUse(vcf)
    chromosomes <- chromosomes[orderSeqlevels(chromosomes)]
    for (chrom in chromosomes) {
        plotAbs(ret, 1, type='BAF', chr=chrom)
    }
    dev.off()
}
