processArrayData {arrayMagic} | R Documentation |
Automated processing of image analyis result files and related annotation information. Combines several steps of microarray data processing, read the package vignette for a detailed example. To access the vignette call openVignette() after calling library(arrayMagic).
processArrayData(spotIdentifier = "Name", verbose = TRUE, loadPath = ".", slideDescriptionFile = "slideDescription.txt", deleteBlanks = TRUE, normalisationMethod = "vsn", subtractBackground=FALSE, spotsRemovedBeforeNormalisation = NULL, spotsRemovedAfterNormalisation = NULL, subGroups = NULL, channelsSeparately = FALSE, hybridisationGroups = NULL, savePath = ".", objectsFileName, plotOutput = "screen", fileNameColumn = "fileName", slideNameColumn, type = "GenePix", dataColumns = NULL, spotAnnoColumns = NULL, channelColumn = NULL, removePatterns = NULL, skip = NULL, ...)
spotIdentifier |
character string; required; default "Name".
spotIdentifier specifies the column
in the image analysis result files which contain
spot or gene identifiers |
verbose |
logical; required; default: TRUE |
loadPath |
character string; required; default: ".".
The path is used for loading of the slideDescriptionFile
and the image analysis result files;
note: "." refers to the working directory.
|
slideDescriptionFile |
character string;
required; default "slideDescription.txt".
The first line of the tab-deliminated file
must contain all column names, i.e. a header
line. In particular it must contain the
column named fileNameColum and possibly
additionally a column named slideNameColumn . |
deleteBlanks |
logical; required; default: TRUE .
If set to TRUE any blank character (space)
is removed from the text body of the
slideDescriptionFile . |
fileNameColumn |
character string; required; default: "fileName".
fileNameColumn specifies the column which
contains the names of all
image analysis result files. |
slideNameColumn |
character string; optional; default missing.
If slideNameColumn is missing the value is set
to fileNameColumn . |
channelColumn |
optional; cf. readIntensities ;
default: NULL |
type |
character string to characterize
the file type like "GenePix" or "generic";
note e.g. "generic" requires the arguments dataColumns
and spotAnnoColumns ; default: "GenePix"; cf.
readIntensities for details |
dataColumns |
required for type "generic";
cf. readIntensities ; default: NULL |
spotAnnoColumns |
required for type "generic";
cf. readIntensities ; default: NULL |
removePatterns |
optional; cf. readIntensities ;
default: NULL |
skip |
optional; cf. readIntensities ;
default: NULL |
... |
further arguments which are passed to
readIntensities and eventually are
passed to read.table .
|
normalisationMethod |
character string; required; default: "vsn";
cf. argument method of normalise |
subtractBackground |
logical; required; default: FALSE
cf. normalise |
spotsRemovedBeforeNormalisation |
vector of character strings;
required; default: NULL .
All spots which match the string(s) will be excluded
already before normalisation and will not be present
in the resulting objects at all.
|
spotsRemovedAfterNormalisation |
vector of character strings;
required; default: NULL ;
cf. argument spotsRemovedBeforeNormalisation .
|
subGroups |
character string or NULL ;
required; cf. normalise ;
default: NULL |
channelsSeparately |
logical; required;
cf. normalise ;
default FALSE |
hybridisationGroups |
list of numeric vectors;
cf. normalise ;
required; default: NULL |
savePath |
character string; required; default: ".".
The directory savePath used to store
the results (cf. argument objectsFileName
and processed files.
If the path does not exist a directory is created;
note: "." refers to the working directory.
|
objectsFileName |
character string; optional;
default missing, i.e. no file is created.
objectsFileName specifies
the name of the file used to store the object
"resultList", i.e. the return value of the function.
|
plotOutput |
character string; required; default: "screen"; Possible values: "screen", "pdf" or "win.metafile". |
A list of objects, i.e. an "exprSetRGObject" and an
"arrayDataObject" with corresponding class types
exprSetRG-class
and
arrayData-class
(cf. the result of processArrayDataObject
).
Side-effects: The result list "resultList" is stored
as file objectsFileName
in the directory savePath
if the argument objectsFileName
is supplied.
The slideDescriptionFile
is stored
with suffix "_processed"
in the directory savePath
.
Andreas Buness <a.buness@dkfz.de>
readpDataSlides
,
readIntensities
,
processArrayDataObject
,
normalise
,
exprSetRG-class
,
arrayData-class
LOADPATH <- file.path(.path.package("arrayMagic"), "extdata") SAVEPATH <- tempdir() SLIDEDESCRIPTIONFILE <- "slideDescription" resultList <- processArrayData( loadPath=LOADPATH, savePath=SAVEPATH, slideDescriptionFile=SLIDEDESCRIPTIONFILE ) writeToFile(arrayDataObject=resultList$arrayDataObject, exprSetRGObject=resultList$exprSetRGObject, fileName="normalisedData.txt", savePath=SAVEPATH) summarizedResult <- slideMerge(exprSetRGObject=resultList$exprSetRGObject, slideMergeColumn="replicates" ) qPL <- qualityParameters(arrayDataObject=resultList$arrayDataObject, exprSetRGObject=resultList$exprSetRGObject) visualiseQualityParameters(qualityParameters=qPL$qualityParameters, savePath=tempdir()) qualityDiagnostics( arrayDataObject=resultList$arrayDataObject, exprSetRGObject=resultList$exprSetRGObject, qualityParametersList=qPL, groupingColumn="replicates", slideNameColumn="fileName", savePath=tempdir(), plotOutput="pdf") unlink(file.path(SAVEPATH, paste(SLIDEDESCRIPTIONFILE,"_processed",sep=""))) resultListG <- processArrayData( loadPath=LOADPATH, savePath=SAVEPATH, slideDescriptionFile=SLIDEDESCRIPTIONFILE, plotOutput="pdf", hybridisationGroups = list((1:4),(5:9)) ) unlink(file.path(SAVEPATH, paste(SLIDEDESCRIPTIONFILE,"_processed",sep=""))) resultListG2 <- processArrayData( loadPath=LOADPATH, savePath=SAVEPATH, slideDescriptionFile=SLIDEDESCRIPTIONFILE, plotOutput="pdf", objectsFileName = "exprSetRG.RData", hybridisationGroups = "slideBySlide" ) unlink(file.path(SAVEPATH, paste(SLIDEDESCRIPTIONFILE,"_processed",sep=""))) SLIDEDESCRIPTIONFILE <- "genericChannelsPerFile" spotAnnoColumns <- c("Index", "Label" , "Type" , "Name" , "ID" ) dataColumns <- c("Normalized....","Average....","Normalized....","Average....") names(dataColumns) <- c("greenForeground","greenBackground", "redForeground","redBackground") resultGenericChannel <- processArrayData( spotIdentifier="Index", loadPath=LOADPATH, savePath=SAVEPATH, slideDescriptionFile=SLIDEDESCRIPTIONFILE, normalisationMethod="none", channelColumn="channel", fileNameColumn="files", slideNameColumn="name", type="genericOneFilePerChannel", spotAnnoColumns=spotAnnoColumns, dataColumns=dataColumns ) unlink(file.path(SAVEPATH, paste(SLIDEDESCRIPTIONFILE,"_processed",sep=""))) SLIDEDESCRIPTIONFILE <- "genericChannelsPerFileTwo" dataColumns <- c("Integral..QL.","Bkg..QL.", "Integral..QL.","Bkg..QL.") names(dataColumns) <- c("greenForeground","greenBackground", "redForeground","redBackground") resultGenericChannelTwo <- processArrayData( spotIdentifier="ID", loadPath=LOADPATH, savePath=SAVEPATH, slideDescriptionFile=SLIDEDESCRIPTIONFILE, normalisationMethod="vsn", channelColumn="channel", fileNameColumn="files", slideNameColumn="name", subtractBackground=TRUE, type="genericOneFilePerChannel", spotAnnoColumns=spotAnnoColumns, dataColumns=dataColumns ) unlink(file.path(SAVEPATH, paste(SLIDEDESCRIPTIONFILE,"_processed",sep="")))