SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1313, Sun Mar 30 09:28:00 2014 UTC revision 1327, Mon Apr 14 15:35:38 2014 UTC
# Line 20  Line 20 
20      db <- filehash::dbInit(dbControl$dbName, dbControl$dbType)      db <- filehash::dbInit(dbControl$dbName, dbControl$dbType)
21    
22      # Allocate memory in advance if length is known      # Allocate memory in advance if length is known
23      tdl <- if (x$length > 0)      tdl <- if (x$length > 0) vector("list", as.integer(x$length)) else list()
         vector("list", as.integer(x$length))  
     else  
         list()  
24    
25      counter <- 1      counter <- 1
26      while (!eoi(x)) {      while (!eoi(x)) {
# Line 63  Line 60 
60          on.exit(readerControl$exit())          on.exit(readerControl$exit())
61    
62      # Allocate memory in advance if length is known      # Allocate memory in advance if length is known
63      tdl <- if (x$length > 0)      tdl <- if (x$length > 0) vector("list", as.integer(x$length)) else list()
         vector("list", as.integer(x$length))  
     else  
         list()  
64    
65      if (x$vectorized)      if (x$vectorized)
66          tdl <- mapply(function(elem, id)          tdl <- mapply(function(elem, id)
# Line 102  Line 96 
96                class = c("VCorpus", "Corpus"))                class = c("VCorpus", "Corpus"))
97  }  }
98    
99  `[.PCorpus` <- `[.VCorpus` <-  `[.PCorpus` <-
100  function(x, i)  function(x, i)
101  {  {
102      if (!missing(i)) {      if (!missing(i)) {
# Line 112  Line 106 
106      x      x
107  }  }
108    
109    `[.VCorpus` <-
110    function(x, i)
111    {
112        if (!missing(i)) {
113            x$content <- x$content[i]
114            x$dmeta <- x$dmeta[i, , drop = FALSE]
115            if (!is.null(x$lazy))
116                x$lazy$index <- x$lazy$index[i]
117        }
118        x
119    }
120    
121  .map_name_index <-  .map_name_index <-
122  function(x, i)  function(x, i)
123  {  {
# Line 132  Line 138 
138  function(x, i)  function(x, i)
139  {  {
140      i <- .map_name_index(x, i)      i <- .map_name_index(x, i)
141      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")      if (!is.null(x$lazy))
142      if (!is.null(lazyTmMap))          .Call(copyCorpus, x, materialize(x, i))
         .Call("copyCorpus", x, materialize(x, i))  
143      x$content[[i]]      x$content[[i]]
144  }  }
145    
# Line 150  Line 155 
155  function(x, i, value)  function(x, i, value)
156  {  {
157      i <- .map_name_index(x, i)      i <- .map_name_index(x, i)
158      # Mark new objects as not active for lazy mapping      # Mark new objects as inactive for lazy mapping
159      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")      if (!is.null(x$lazy))
160      if (!is.null(lazyTmMap)) {          x$lazy$index[i] <- FALSE
         lazyTmMap$index[i] <- FALSE  
         meta(x, tag = "lazyTmMap", type = "corpus") <- lazyTmMap  
     }  
161      x$content[[i]] <- value      x$content[[i]] <- value
162      x      x
163  }  }
164    
165  # Update NodeIDs of a CMetaData tree  outer_union <-
166  .update_id <-  function(x, y, ...)
 function(x, id = 0, mapping = NULL, left.mapping = NULL, level = 0)  
 {  
     # Traversal of (binary) CMetaData tree with setup of NodeIDs  
     set_id <- function(x) {  
         x$NodeID <- id  
         id <<- id + 1  
         level <<- level + 1  
         if (length(x$Children)) {  
             mapping <<- cbind(mapping, c(x$Children[[1]]$NodeID, id))  
             left <- set_id(x$Children[[1]])  
             if (level == 1) {  
                 left.mapping <<- mapping  
                 mapping <<- NULL  
             }  
             mapping <<- cbind(mapping, c(x$Children[[2]]$NodeID, id))  
             right <- set_id(x$Children[[2]])  
   
             x$Children <- list(left, right)  
         }  
         level <<- level - 1  
         x  
     }  
     list(root = set_id(x), left.mapping = left.mapping, right.mapping = mapping)  
 }  
   
 # Find indices to be updated for a CMetaData tree  
 .find_indices <-  
 function(x)  
167  {  {
168      indices.mapping <- NULL      if (nrow(x) > 0L)
169      for (m in levels(as.factor(CorpusDMeta(x)$MetaID))) {          x[, setdiff(names(y), names(x))] <- NA
170          indices <- (CorpusDMeta(x)$MetaID == m)      if (nrow(y) > 0L)
171          indices.mapping <- c(indices.mapping, list(m = indices))          y[, setdiff(names(x), names(y))] <- NA
172          names(indices.mapping)[length(indices.mapping)] <- m      res <- rbind(x, y)
173      }      if (ncol(res) == 0L)
174      indices.mapping          res <- data.frame(row.names = seq_len(nrow(x) + nrow(y)))
175  }      res
176    }
 #c2 <-  
 #function(x, y, ...)  
 #{  
 #    # Update the CMetaData tree  
 #    cmeta <- .MetaDataNode(0, list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), list(CMetaData(x), CMetaData(y)))  
 #    update.struct <- .update_id(cmeta)  
 #  
 #    new <- .VCorpus(c(unclass(x), unclass(y)), update.struct$root, NULL)  
 #  
 #    # Find indices to be updated for the left tree  
 #    indices.mapping <- .find_indices(x)  
 #  
 #    # Update the CorpusDMeta data frames for the left tree  
 #    for (i in 1:ncol(update.struct$left.mapping)) {  
 #        map <- update.struct$left.mapping[,i]  
 #        DMetaData(x)$MetaID <- replace(DMetaData(x)$MetaID, indices.mapping[[as.character(map[1])]], map[2])  
 #    }  
 #  
 #    # Find indices to be updated for the right tree  
 #    indices.mapping <- .find_indices(y)  
 #  
 #    # Update the CorpusDMeta data frames for the right tree  
 #    for (i in 1:ncol(update.struct$right.mapping)) {  
 #        map <- update.struct$right.mapping[,i]  
 #        DMetaData(y)$MetaID <- replace(DMetaData(y)$MetaID, indices.mapping[[as.character(map[1])]], map[2])  
 #    }  
 #  
 #    # Merge the CorpusDMeta data frames  
 #    labels <- setdiff(names(DMetaData(y)), names(DMetaData(x)))  
 #    na.matrix <- matrix(NA,  
 #                        nrow = nrow(DMetaData(x)),  
 #                        ncol = length(labels),  
 #                        dimnames = list(row.names(DMetaData(x)), labels))  
 #    x.dmeta.aug <- cbind(DMetaData(x), na.matrix)  
 #    labels <- setdiff(names(DMetaData(x)), names(DMetaData(y)))  
 #    na.matrix <- matrix(NA,  
 #                        nrow = nrow(DMetaData(y)),  
 #                        ncol = length(labels),  
 #                        dimnames = list(row.names(DMetaData(y)), labels))  
 #    y.dmeta.aug <- cbind(DMetaData(y), na.matrix)  
 #    DMetaData(new) <- rbind(x.dmeta.aug, y.dmeta.aug)  
 #  
 #    new  
 #}  
177    
178  c.VCorpus <-  c.VCorpus <-
179  function(..., recursive = FALSE)  function(..., recursive = FALSE)
# Line 257  Line 187 
187      if (!all(unlist(lapply(args, inherits, class(x)))))      if (!all(unlist(lapply(args, inherits, class(x)))))
188          stop("not all arguments are of the same corpus type")          stop("not all arguments are of the same corpus type")
189    
190      if (recursive)      structure(list(content = do.call("c", lapply(args, content)),
191          Reduce(c2, args)                     meta = structure(do.call("c",
192      else {                       lapply(args, function(a) meta(a, type = "corpus"))),
193          args <- do.call("c", lapply(args, content))                                      class = "CorpusMeta"),
194          structure(list(content = args,                     dmeta = Reduce(outer_union, lapply(args, meta))),
                        meta = CorpusMeta(),  
                        dmeta = data.frame(row.names = seq_along(args))),  
195                    class = c("VCorpus", "Corpus"))                    class = c("VCorpus", "Corpus"))
196      }      }
 }  
197    
198  c.TextDocument <-  c.TextDocument <-
199  function(..., recursive = FALSE)  function(..., recursive = FALSE)
# Line 293  Line 220 
220  content.VCorpus <-  content.VCorpus <-
221  function(x)  function(x)
222  {  {
223      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")      if (!is.null(x$lazy))
224      if (!is.null(lazyTmMap))          .Call(copyCorpus, x, materialize(x))
         .Call("copyCorpus", x, materialize(x))  
225      x$content      x$content
226  }  }
227    

Legend:
Removed from v.1313  
changed lines
  Added in v.1327

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge