SCM

SCM Repository

[tm] Diff of /pkg/R/matrix.R
ViewVC logotype

Diff of /pkg/R/matrix.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1444, Mon Aug 22 11:50:13 2016 UTC revision 1445, Sun Oct 9 09:30:58 2016 UTC
# Line 146  Line 146 
146  {  {
147      stopifnot(is.list(control))      stopifnot(is.list(control))
148    
149      tflist <- mclapply(unname(content(x)), termFreq, control)      tflist <- lapply(unname(content(x)), termFreq, control)
150    
151      v <- unlist(tflist)      v <- unlist(tflist)
152      i <- names(v)      i <- names(v)
153      terms <- sort(unique(as.character(if (is.null(control$dictionary)) i      terms <- sort(unique(as.character(if (is.null(control$dictionary)) i
154                                        else control$dictionary)))                                        else control$dictionary)))
155      i <- match(i, terms)      i <- match(i, terms)
156      j <- rep(seq_along(x), sapply(tflist, length))      j <- rep.int(seq_along(x), lengths(tflist))
157    
158      m <- .SimpleTripletMatrix(i, j, as.numeric(v), terms, x)      m <- .SimpleTripletMatrix(i, j, as.numeric(v), terms, x)
159      m <- filter_global_bounds(m, control$bounds$global)      m <- filter_global_bounds(m, control$bounds$global)
# Line 178  Line 178 
178  function(x, ...)  function(x, ...)
179  {  {
180      m <- simple_triplet_matrix(i = seq_along(x),      m <- simple_triplet_matrix(i = seq_along(x),
181                                 j = rep(1, length(x)),                                 j = rep_len(1L, length(x)),
182                                 v = as.numeric(x),                                 v = as.numeric(x),
183                                 nrow = length(x),                                 nrow = length(x),
184                                 ncol = 1,                                 ncol = 1,
# Line 453  Line 453 
453      j <- lapply(m, "[[", "j")      j <- lapply(m, "[[", "j")
454    
455      m <- simple_triplet_matrix(i = match(allTermsNonUnique, allTerms),      m <- simple_triplet_matrix(i = match(allTermsNonUnique, allTerms),
456                                 j = unlist(j) + rep.int(cs, sapply(j, length)),                                 j = unlist(j) + rep.int(cs, lengths(j)),
457                                 v = unlist(lapply(m, "[[", "v")),                                 v = unlist(lapply(m, "[[", "v")),
458                                 nrow = length(allTerms),                                 nrow = length(allTerms),
459                                 ncol = length(allDocs),                                 ncol = length(allDocs),
# Line 462  Line 462 
462                                      Docs = allDocs))                                      Docs = allDocs))
463      ## <NOTE>      ## <NOTE>
464      ## - We assume that all arguments have the same weighting      ## - We assume that all arguments have the same weighting
465      ## - Even if all matrices have the same input weighting it might be necessary      ## - Even if all matrices have the same input weighting it might be
466      ##   to take additional steps (e.g., normalization for tf-idf or check for      ##   necessary to take additional steps (e.g., normalization for tf-idf or
467      ##   (0,1)-range for binary tf)      ##   check for (0,1)-range for binary tf)
468      ## </NOTE>      ## </NOTE>
469      .TermDocumentMatrix(m, weighting)      .TermDocumentMatrix(m, weighting)
470  }  }

Legend:
Removed from v.1444  
changed lines
  Added in v.1445

R-Forge@R-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business University of Wisconsin - Madison Powered By FusionForge