SCM

SCM Repository

[tm] View of /trunk/tm/R/distmeasure.R
ViewVC logotype

View of /trunk/tm/R/distmeasure.R

Parent Directory Parent Directory | Revision Log Revision Log


Revision 53 - (download) (annotate)
Thu Aug 24 13:06:50 2006 UTC (12 years, 11 months ago) by feinerer
Original Path: trunk/R/textmin/R/distmeasure.R
File size: 3115 byte(s)
See ChangeLog for changes.
# Author: Ingo Feinerer

setGeneric("dissimilarity", function(x, y, method) standardGeneric("dissimilarity"))
setMethod("dissimilarity",
          signature(x = "TermDocMatrix", y = "ANY", method = "character"),
          function(x, y = NULL, method = "cosine") {
              type <- match.arg(method, c("cosine","pearson","extjacc"))

              m <- matrix(0, nrow = nrow(x), ncol = nrow(x))
              for (i in 1:nrow(m)) {
                  for (j in 1:ncol(m)) {
                      # Do not compute symmetric values
                      if (0 == m[j,i])
                          switch(type,
                                 "cosine" = {
                                     m[i,j] <- cosinus(x[i,],x[j,])
                                 },
                                 "pearson" = {
                                     m[i,j] <- pearson(x[i,],x[j,])
                                 },
                                 "extjacc" = {
                                     m[i,j] <- extjacc(x[i,],x[j,])
                                 }
                                 )
                      else
                          m[i,j] <- m[j,i]
                  }
              }
              d.vec <- as.vector(m)
              d <- numeric(0)
              a <- k <- 1
              while (k <= length(d.vec)) {
                  if (k %% nrow(x) == 1) {
                      k <- k + a
                      a <- a + 1
                  }
                  if (k <= length(d.vec))
                      d <- c(d, d.vec[k])
                  k <- k + 1
              }
              attr(d, "Size") <- nrow(x)
              attr(d, "Labels") <- dimnames(x)[[1]]
              attr(d, "Diag") <- FALSE
              attr(d, "Upper") <- FALSE
              switch(type,
                     "cosine" = {
                         attr(d, "method") <- "cosine"
                     },
                     "pearson" = {
                         attr(d, "method") <- "pearson"
                     },
                     "extjacc" = {
                         attr(d, "method") <- "extjacc"
                     }
                     )
              class(d) <- "dist"
              return(1 - d)
          })
setMethod("dissimilarity",
          c("TextDocument", "TextDocument", "character"),
          function(x, y, method) {
              tdm <- TermDocMatrix(as(list(x,y), "TextDocCol"))
              dissim <- dissimilarity(tdm, method = method)
              class(dissim) <- "dist"
              return(dissim)
          })

cosinus <- function(x, y) {
    if (!(is.vector(x) && is.vector(y)))
        stop("Invalid input")
    crossprod(x, y) / sqrt(crossprod(x) * crossprod(y))
}

pearson <- function(x, y) {
    if (!(is.vector(x) && is.vector(y)))
        stop("Invalid input")

    xa <- (x - mean(x))
    xb <- (y - mean(y))

    1/2 * ((crossprod(xa, xb) / sqrt(crossprod(xa) * crossprod(xb))) + 1)
}

extjacc <- function(x, y) {
    if (!(is.vector(x) && is.vector(y)))
        stop("Invalid input")
    crossprod(x, y) / (crossprod(x) + crossprod(y) - crossprod(x, y))
}

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge