SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1021, Tue Nov 17 16:37:22 2009 UTC revision 1108, Fri Oct 22 18:32:47 2010 UTC
# Line 10  Line 10 
10  DBControl <- function(x) attr(x, "DBControl")  DBControl <- function(x) attr(x, "DBControl")
11    
12  PCorpus <- function(x,  PCorpus <- function(x,
13                      readerControl = list(reader = x$DefaultReader, language = "eng"),                      readerControl = list(reader = x$DefaultReader, language = "en"),
14                      dbControl = list(dbName = "", dbType = "DB1"),                      dbControl = list(dbName = "", dbType = "DB1"),
15                      ...) {                      ...) {
16      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)
# Line 29  Line 29 
29      while (!eoi(x)) {      while (!eoi(x)) {
30          x <- stepNext(x)          x <- stepNext(x)
31          elem <- getElem(x)          elem <- getElem(x)
32          doc <- readerControl$reader(elem, readerControl$language, as.character(counter))          doc <- readerControl$reader(elem, readerControl$language, if (is.null(x$Names)) as.character(counter) else x$Names[counter])
33          filehash::dbInsert(db, ID(doc), doc)          filehash::dbInsert(db, ID(doc), doc)
34          if (x$Length > 0) tdl[[counter]] <- ID(doc)          if (x$Length > 0) tdl[[counter]] <- ID(doc)
35          else tdl <- c(tdl, ID(doc))          else tdl <- c(tdl, ID(doc))
36          counter <- counter + 1          counter <- counter + 1
37      }      }
38        names(tdl) <- x$Names
39    
40      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)
41      filehash::dbInsert(db, "DMetaData", df)      filehash::dbInsert(db, "DMetaData", df)
# Line 58  Line 59 
59    
60  # The "..." are additional arguments for the FunctionGenerator reader  # The "..." are additional arguments for the FunctionGenerator reader
61  VCorpus <- Corpus <- function(x,  VCorpus <- Corpus <- function(x,
62                      readerControl = list(reader = x$DefaultReader, language = "eng"),                                readerControl = list(reader = x$DefaultReader, language = "en"),
63                      ...) {                      ...) {
64      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)
65    
# Line 71  Line 72 
72      if (x$Vectorized)      if (x$Vectorized)
73          tdl <- mapply(function(x, id) readerControl$reader(x, readerControl$language, id),          tdl <- mapply(function(x, id) readerControl$reader(x, readerControl$language, id),
74                        pGetElem(x),                        pGetElem(x),
75                        id = as.character(seq_len(x$Length)),                        id = if (is.null(x$Names)) as.character(seq_len(x$Length)) else x$Names,
76                        SIMPLIFY = FALSE)                        SIMPLIFY = FALSE)
77      else {      else {
78          counter <- 1          counter <- 1
79          while (!eoi(x)) {          while (!eoi(x)) {
80              x <- stepNext(x)              x <- stepNext(x)
81              elem <- getElem(x)              elem <- getElem(x)
82              doc <- readerControl$reader(elem, readerControl$language, as.character(counter))              doc <- readerControl$reader(elem, readerControl$language, if (is.null(x$Names)) as.character(counter) else x$Names[counter])
83              if (x$Length > 0)              if (x$Length > 0)
84                  tdl[[counter]] <- doc                  tdl[[counter]] <- doc
85              else              else
# Line 86  Line 87 
87              counter <- counter + 1              counter <- counter + 1
88          }          }
89      }      }
90        names(tdl) <- x$Names
91      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)
92      .VCorpus(tdl, .MetaDataNode(), df)      .VCorpus(tdl, .MetaDataNode(), df)
93  }  }
# Line 108  Line 109 
109      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])
110      counter <- 1      counter <- 1
111      for (id in unclass(x)[i]) {      for (id in unclass(x)[i]) {
112          if (identical(length(value), 1)) db[[id]] <- value          if (identical(length(value), 1L)) db[[id]] <- value
113          else db[[id]] <- value[[counter]]          else db[[id]] <- value[[counter]]
114          counter <- counter + 1          counter <- counter + 1
115      }      }
116      x      x
117  }  }
118    
119    .map_name_index <- function(x, i) {
120        if (is.character(i)) {
121            if (is.null(names(x)))
122                match(i, meta(x, "ID", type = "local"))
123            else
124                match(i, names(x))
125        }
126        i
127    }
128    
129  `[[.PCorpus` <-  function(x, i) {  `[[.PCorpus` <-  function(x, i) {
130        i <- .map_name_index(x, i)
131      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])
132      filehash::dbFetch(db, NextMethod("[["))      filehash::dbFetch(db, NextMethod("[["))
133  }  }
134  `[[.VCorpus` <-  function(x, i) {  `[[.VCorpus` <-  function(x, i) {
135        i <- .map_name_index(x, i)
136      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")
137      if (!is.null(lazyTmMap))      if (!is.null(lazyTmMap))
138          .Call("copyCorpus", x, materialize(x, i))          .Call("copyCorpus", x, materialize(x, i))
# Line 127  Line 140 
140  }  }
141    
142  `[[<-.PCorpus` <-  function(x, i, value) {  `[[<-.PCorpus` <-  function(x, i, value) {
143        i <- .map_name_index(x, i)
144      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])
145      index <- unclass(x)[[i]]      index <- unclass(x)[[i]]
146      db[[index]] <- value      db[[index]] <- value
147      x      x
148  }  }
149  `[[<-.VCorpus` <-  function(x, i, value) {  `[[<-.VCorpus` <-  function(x, i, value) {
150        i <- .map_name_index(x, i)
151      # Mark new objects as not active for lazy mapping      # Mark new objects as not active for lazy mapping
152      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")
153      if (!is.null(lazyTmMap)) {      if (!is.null(lazyTmMap)) {
# Line 224  Line 239 
239  {  {
240      args <- list(...)      args <- list(...)
241    
242      if (identical(length(args), 0))      if (identical(length(args), 0L))
243          return(x)          return(x)
244    
245      if (!all(unlist(lapply(args, inherits, class(x)))))      if (!all(unlist(lapply(args, inherits, class(x)))))
# Line 233  Line 248 
248      if (inherits(x, "PCorpus"))      if (inherits(x, "PCorpus"))
249          stop("concatenation of corpora with underlying databases is not supported")          stop("concatenation of corpora with underlying databases is not supported")
250    
251      Reduce(c2, base::c(list(x), args))      l <- base::c(list(x), args)
252        if (recursive)
253            Reduce(c2, l)
254        else {
255            l <- do.call("c", lapply(l, unclass))
256            .VCorpus(l,
257                     cmeta = .MetaDataNode(),
258                     dmeta = data.frame(MetaID = rep(0, length(l)), stringsAsFactors = FALSE))
259        }
260  }  }
261    
262  c.TextDocument <- function(x, ..., recursive = FALSE) {  c.TextDocument <- function(x, ..., recursive = FALSE) {
263      args <- list(...)      args <- list(...)
264    
265      if (identical(length(args), 0))      if (identical(length(args), 0L))
266          return(x)          return(x)
267    
268      if (!all(unlist(lapply(args, inherits, class(x)))))      if (!all(unlist(lapply(args, inherits, class(x)))))

Legend:
Removed from v.1021  
changed lines
  Added in v.1108

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge