\encoding{UTF-8}
\name{smooth.roc}
\alias{smooth}
\alias{smooth.roc}
\alias{smooth.smooth.roc}
\alias{smooth.default}
\title{
 Smooth a ROC curve
}
\description{
  This function smoothes a ROC curve. By default, a binormal smoothing
  is performed, but density or custom smoothings are supported.
}
\usage{
smooth(x, ...)
\S3method{smooth}{default}(x, ...)
\S3method{smooth}{roc}(roc,
method=c("binormal", "density", "fitdistr"), n=512, bandwidth = "nrd",
density=NULL, density.controls=density, density.cases=density,
reuse.auc=TRUE, reuse.ci=TRUE, ...)
\S3method{smooth}{smooth.roc}(smooth.roc, ...)
}

\arguments{
  \item{x}{a roc object from the \link{roc} function (for smooth.roc),
    or a vector (for the regular (s+ default) smooth function).
  }
  \item{roc, smooth.roc}{a \dQuote{roc} object from the
	\code{\link{roc}} function, or a \dQuote{smooth.roc} object from the
	\code{\link{smooth.roc}} function.
  }
  \item{method}{\dQuote{binormal}, \dQuote{density}, \dQuote{fitdistr},
    or a function returning a list of smoothed sensitivities and specificities.
  }
  \item{n}{
    the number of equally spaced points where the smoothed curve will be
    calculated.
  }
  \item{bandwidth}{
    if \code{method="density"} and \code{density.controls} and
    \code{density.cases} are not provided, \code{bandwidth} is passed as
    \code{width} to \code{\link{density}} to determine the bandwidth of the
    density Can be a character string (\dQuote{nrd}, \dQuote{hb}, 
    \dQuote{ucv}, \dQuote{bcv} or \dQuote{sj}, but any name
    matching a function prefixed with \dQuote{bandwidth.} is
    supported) or a numeric value, as described in
    \code{\link{density}}.
    Defaults to \dQuote{\link[bandwidth]{nrd}}.
  }
  \item{density, density.controls, density.cases}{if
    \code{method="density"}, a numeric value of density (over the y
    axis) or a function returning a density (such as
    \code{\link{density}}. If \code{method="fitdistr"}, one of
	\dQuote{normal} (default), \dQuote{exponential}, \dQuote{log-normal}
	(same as \dQuote{lognormal}) or \dQuote{uniform}, stating the shape of
	the underlying distribution.
    If the value is different for control and case observations,
    \code{density.controls} and \code{density.cases} can be employed
    instead, otherwise \code{density} will be propagated to both
	\code{density.controls} and \code{density.cases}.
  }
  \item{reuse.auc, reuse.ci}{if \code{TRUE} (default) and the \dQuote{roc} objects
    contain \dQuote{auc} or \dQuote{ci} fields, re-use these specifications for the
    test. If \code{FALSE}, the object returned will not contain
    \dQuote{auc} or \dQuote{ci} fields. It is currently not possible to
    redefine the options directly: you need to call \code{\link{auc}} or
    \code{\link{ci}} later.
  }
  \item{\dots}{further arguments passed to or from other methods, and
    especially to \code{\link{density}} (only \code{cut} and
    \code{window}, plus \code{kernel} and \code{adjust} for
    compatibility with R). Also passed to to \code{method} if it
    is a function. 
  }
}

\details{
  If \code{method="binormal"}, a linear model is fitted to the quantiles of
  the sensitivities and specificities. Smoothed sensitivities and
  specificities are then generated from this model on \code{n} points.
  This simple approach was found to work well for most ROC curves, but
  it may produce hooked smooths in some situations (see in Hanley (1988)). 

  If \code{method="density"}, the \code{\link{density}}
  function is employed to generate a smooth
  density of the control and case observations, unless
  \code{density.controls} or \code{density.cases} are provided
  directly. Otherwise, \code{bandwidth} can be given to
  specify a bandwidth to use with \code{\link{density}}. It can be a
  numeric value or a character string (\dQuote{nrd}, \dQuote{hb}, 
  \dQuote{ucv}, \dQuote{bcv} or \dQuote{sj}, but any name
  matching a function prefixed with \dQuote{bandwidth.} is
  supported). In the case of a character
  string, the whole predictor data is employed to determine the numeric
  value to use on both controls and cases.
  Note that the \code{width} argument to \code{density} is here called
  \code{bandwidth} to avoid clashes with the \code{width} argument to
  \code{\link{plot.roc}}.
  Depending on your data, it might be a good idea to specify the
  \code{window} argument for \code{\link{density}}. By default,
  \dQuote{gaussian} is used, but \dQuote{cosine}, \dQuote{3gaussian},
  \dQuote{rectangular} and \dQuote{triangular} are supported. As all the
  window kernels are symetrical, it might help to normalize the data first
  (that is, before calling \code{\link{roc}}), for example with quantile
  normalization:
  \preformatted{
    norm.x <- qnorm(rank(x)/(length(x)+1))
    smooth(roc(response, norm.x, ...), ...)
  }
  
  Additionally, \code{density} can be a function which must return
  either a numeric vector of densities over the y axis or a \link{list}
  with a \dQuote{y} item like the \code{\link{density}} function. It
  must accept the following input:
  \preformatted{
    density.fun(x, n, from, to, width, window, ...)
  }
  It is important to honour \code{n}, \code{from} and \code{to} in order
  to have the densities evaluated on the same points for controls and
  cases. Failing to do so and returning densities of different length
  will produce an error. It is also a good idea to use a constant
  smoothing parameter (such as \code{width}) especially when controls and
  cases have a different number of observations, to avoid producing
  smoother or rougher densities.

  If \code{method="fitdistr"}, a function similar to the \code{\link[MASS]{fitdistr}}
  function from the \pkg{MASS} package is employed to fit parameters for
  the density function \code{density}. The density function are fitted
  separately in control (\code{density.controls})
  and case observations (\code{density.cases}). \code{density} can be
  one of the character values \dQuote{normal} (default), 
  \dQuote{exponential}, \dQuote{log-normal} or \dQuote{uniform}.
  No \code{start} parameter is supported, unlike
  \code{\link[MASS]{fitdistr}} in the \pkg{MASS} package.

  Finally, \code{method} can also be a function. It must
  return a list with exactly 2 elements named \dQuote{sensitivities} and
  \dQuote{specificities}, which must be numeric vectors between 0 and 1
  or 100 (depending on the \code{percent} argument to
  \code{\link{roc}}). It is passed all the arguments to the
  \code{smooth} function.

  \code{smooth.default} forces the usage of the default
  \code{\link[splus]{smooth}} function, so
  that other code relying on smooth should continue to function
  normally.

  Smoothed ROC curves can be passed to smooth again. In this case, the
  smoothing is not re-applied on the smoothed ROC curve but the
  original \dQuote{\link{roc}} object will be re-used.
}

\value{
  A list of class \dQuote{smooth.roc} with the following fields:
  \item{sensitivities}{the smoothed sensitivities defining the ROC curve.}
  \item{specificities}{the smoothed specificities defining the ROC curve.}
  \item{percent}{if the sensitivities, specificities and AUC are
    reported in percent, as defined in argument.
  }
  \item{direction}{the direction of the comparison, as defined in argument.}
  \item{thresholds}{the thresholds at which the sensitivities and
    specificities were computed.
  }
  \item{call}{how the function was called. See \code{\link{match.call}} for
    more details.
  }
  \item{smoothing.args}{a list of the arguments used for the
    smoothing. Will serve to apply the smoothing again in further
    bootstrap operations.
  }
  \item{fit.controls, fit.cases}{a list similar to a result of \pkg{MASS}'s
    \code{\link{fitdistr}} function for controls and cases, but with
    only \dQuote{estimate}, and an additional \dQuote{densfun} item
    indicating the density function, if possible as character.
  }
  \item{auc}{if the original ROC curve contained an AUC, it is computed
    again on the smoothed ROC.
  }
  \item{ci}{if the original ROC curve contained a CI, it is computed
    again on the smoothed ROC.
  }
  Additionally, the original \code{\link{roc}} object is stored as a
  \dQuote{roc} attribute.
}

\section{Errors}{
  If \code{method} is a function, the return values will be checked
  thoroughly for validity (list with two numeric elements of the same
  length named \dQuote{sensitivities} and \dQuote{specificities} with
  values in the range of possible values for sensitivities and
  specificities).

  The message \dQuote{The 'density function must return a numeric
    vector or a list with a 'y' item.} will be displayed if the
  \code{density} function did not return a valid output. The message
  \dQuote{Length of 'density.controls' and 'density.cases' differ.}
  will be displayed if the returned value differ in length.

  Binormal smoothing cannot smooth ROC curve defined by only one
  point. Any such attempt will fail with the error \dQuote{ROC curve not
  smoothable (not enough points).}. It will also fail if the points are
  poorly distributed and no model can be fit. In such a case, the error
  from 'lm' is printed within the message.

  If the smooth ROC curve was generated by \code{\link{roc}} with
  \code{density.controls} and \code{density.cases} numeric arguments, it
  cannot be smoothed and the error \dQuote{Cannot smooth a ROC curve
    generated directly with numeric 'density.controls' and
    'density.cases'.} is produced.
}

\references{
  James E. Hanley (1988) ``The robustness of the ``binormal'' assumptions
  used in fitting ROC curves''. \emph{Medical Decision Making} \bold{8}, 197--203.
}

\seealso{
 \code{\link{roc}}
}

\examples{
data(aSAH)

## Basic example

rocobj <- roc(aSAH$outcome, aSAH$s100b)
smooth(rocobj)
# or directly with roc()
roc(aSAH$outcome, aSAH$s100b, smooth=TRUE)

# plotting
plot(rocobj)
rs <- smooth(rocobj, method="binormal")
plot(rs, add=TRUE, col="green")
rs2 <- smooth(rocobj, method="density")
plot(rs2, add=TRUE, col="blue")
rs3 <- smooth(rocobj, method="fitdistr", density="lognormal")
plot(rs3, add=TRUE, col="magenta")
legend(.6, .4, legend=c("Empirical", "Binormal", "Density", "Log-normal"),
       col=c("black", "green", "blue", "magenta"), lwd=2)

## Advanced smoothing

# different distibutions for controls and cases:
smooth(rocobj, method="fitdistr", density.controls="normal", density.cases="lognormal")

# with densities
width <- bandwidth.nrd(rocobj$predictor)
density.controls <- density(rocobj$controls, from=min(rocobj$predictor) - 3 * width,
                            to=max(rocobj$predictor) + 3*width, width=width, window="gaussian")
density.cases <- density(rocobj$cases, from=min(rocobj$predictor) - 3 * width,
                            to=max(rocobj$predictor) + 3*width, width=width, window="gaussian")
smooth(rocobj, method="density", density.controls=density.controls$y, 
       density.cases=density.cases$y)
# which is roughly what is done by a simple:
smooth(rocobj, method="density")


## Smoothing artificial ROC curves

# two normals
roc.norm <- roc(rep(c(0, 1), each=1000), 
                c(rnorm(1000), rnorm(1000)+1), plot=TRUE)
plot(smooth(roc.norm), col="green", lwd=1, add=TRUE)
plot(smooth(roc.norm, method="density"), col="red", lwd=1, add=TRUE)
plot(smooth(roc.norm, method="fitdistr"), col="blue", lwd=1, add=TRUE)
legend(.6, .4, legend=c("empirical", "binormal", "density",, "fitdistr"),
       col=c(par("fg"), "green", "red", "blue"), lwd=c(2, 1, 1, 1, 1))
       
# deviation from the normality
roc.norm.exp <- roc(rep(c(0, 1), each=1000), 
                    c(rnorm(1000), rexp(1000)), plot=TRUE)
plot(smooth(roc.norm.exp), col="green", lwd=1, add=TRUE)
plot(smooth(roc.norm.exp, method="density"), col="red", lwd=1, add=TRUE)
# Wrong fitdistr: normality assumed by default
plot(smooth(roc.norm.exp, method="fitdistr"), col="blue", lwd=1, add=TRUE)
# Correct fitdistr
plot(smooth(roc.norm.exp, method="fitdistr", density.controls="normal",
            density.cases="exponential"), col="purple", lwd=1, add=TRUE)
legend(.6, .4, legend=c("empirical", "binormal", "density",
                        "wrong fitdistr", "correct fitdistr"),
       col=c(par("fg"), "green", "red", "blue", "purple"), lwd=c(2, 1, 1, 1, 1))


# large deviation from the normality
roc.unif.exp <- roc(rep(c(0, 1), each=1000), 
                    c(runif(1000, -1, 1), rexp(1000)), plot=TRUE)
plot(smooth(roc.unif.exp), col="green", lwd=1, add=TRUE)
plot(smooth(roc.unif.exp, method="density"), col="red", lwd=1, add=TRUE)
# Wrong fitdistr: normality assumed by default
plot(smooth(roc.unif.exp, method="fitdistr"), col="blue", lwd=1, add=TRUE)
# Correct fitdistr
plot(smooth(roc.unif.exp, method="fitdistr", density.controls="uniform",
            density.cases="exponential"), col="purple", lwd=1, add=TRUE)
legend(.6, .4, legend=c("empirical", "binormal", "density", "density ucv",
                        "wrong fitdistr", "correct fitdistr"),
       col=c(par("fg"), "green", "red", "magenta", "blue", "purple"),
lwd=c(2, 1, 1, 1, 1))

# 2 uniform distributions with a custom density function
unif.density <- function(x, n, from, to, bw, kernel, ...) {
  smooth.x <- seq(from=from, to=to, length.out=n)
  smooth.y <- dunif(smooth.x, min=min(x), max=max(x))
  return(smooth.y)
}
roc.unif <- roc(rep(c(0, 1), each=1000),
                c(runif(1000, -1, 1), runif(1000, 0, 2)), plot=TRUE)
s <- smooth(roc.unif, method="density", density=unif.density)
plot(roc.unif)
plot(s, add=TRUE, col="grey")

# you can bootstrap a ROC curve smoothed with a density function:
ci(s, boot.n=100)
}

\keyword{univar}
\keyword{nonparametric}
\keyword{utilities}
\keyword{roc}
\keyword{smooth}
