# SCM Repository

[tm] Diff of /trunk/tm/R/distmeasure.R
 [tm] / trunk / tm / R / distmeasure.R

# Diff of /trunk/tm/R/distmeasure.R

trunk/R/textmin/R/distmeasure.R revision 53, Thu Aug 24 13:06:50 2006 UTC trunk/tm/R/distmeasure.R revision 693, Fri Dec 22 13:21:30 2006 UTC
# Line 1  Line 1
1  # Author: Ingo Feinerer  # Author: Ingo Feinerer
2
3  setGeneric("dissimilarity", function(x, y, method) standardGeneric("dissimilarity"))  setGeneric("dissimilarity", function(x, y = NULL, method) standardGeneric("dissimilarity"))
4  setMethod("dissimilarity",  setMethod("dissimilarity",
5            signature(x = "TermDocMatrix", y = "ANY", method = "character"),            signature(x = "TermDocMatrix", y = "ANY", method = "character"),
6            function(x, y = NULL, method = "cosine") {            function(x, y = NULL, method) {
7                type <- match.arg(method, c("cosine","pearson","extjacc"))                # Until factored out in a seperate package
8                  # use the \code{dists} function from the \pkg{cba} package
9                m <- matrix(0, nrow = nrow(x), ncol = nrow(x))                dists(x, y, method)
for (i in 1:nrow(m)) {
for (j in 1:ncol(m)) {
# Do not compute symmetric values
if (0 == m[j,i])
switch(type,
"cosine" = {
m[i,j] <- cosinus(x[i,],x[j,])
},
"pearson" = {
m[i,j] <- pearson(x[i,],x[j,])
},
"extjacc" = {
m[i,j] <- extjacc(x[i,],x[j,])
}
)
else
m[i,j] <- m[j,i]
}
}
d.vec <- as.vector(m)
d <- numeric(0)
a <- k <- 1
while (k <= length(d.vec)) {
if (k %% nrow(x) == 1) {
k <- k + a
a <- a + 1
}
if (k <= length(d.vec))
d <- c(d, d.vec[k])
k <- k + 1
}
attr(d, "Size") <- nrow(x)
attr(d, "Labels") <- dimnames(x)[[1]]
attr(d, "Diag") <- FALSE
attr(d, "Upper") <- FALSE
switch(type,
"cosine" = {
attr(d, "method") <- "cosine"
},
"pearson" = {
attr(d, "method") <- "pearson"
},
"extjacc" = {
attr(d, "method") <- "extjacc"
}
)
class(d) <- "dist"
return(1 - d)
10            })            })
11  setMethod("dissimilarity",  setMethod("dissimilarity",
12            c("TextDocument", "TextDocument", "character"),            signature(x = "TextDocument", y = "TextDocument", method = "character"),
13            function(x, y, method) {            function(x, y = NULL, method) {
14                tdm <- TermDocMatrix(as(list(x,y), "TextDocCol"))                tdm <- TermDocMatrix(as(list(x,y), "TextDocCol"))
15                dissim <- dissimilarity(tdm, method = method)                dissim <- dissimilarity(tdm, method = method)
class(dissim) <- "dist"
16                return(dissim)                return(dissim)
17            })            })

cosinus <- function(x, y) {
if (!(is.vector(x) && is.vector(y)))
stop("Invalid input")
crossprod(x, y) / sqrt(crossprod(x) * crossprod(y))
}

pearson <- function(x, y) {
if (!(is.vector(x) && is.vector(y)))
stop("Invalid input")

xa <- (x - mean(x))
xb <- (y - mean(y))

1/2 * ((crossprod(xa, xb) / sqrt(crossprod(xa) * crossprod(xb))) + 1)
}

extjacc <- function(x, y) {
if (!(is.vector(x) && is.vector(y)))
stop("Invalid input")
crossprod(x, y) / (crossprod(x) + crossprod(y) - crossprod(x, y))
}

Legend:
 Removed from v.53 changed lines Added in v.693