SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1021, Tue Nov 17 16:37:22 2009 UTC revision 1114, Fri Nov 26 14:05:54 2010 UTC
# Line 10  Line 10 
10  DBControl <- function(x) attr(x, "DBControl")  DBControl <- function(x) attr(x, "DBControl")
11    
12  PCorpus <- function(x,  PCorpus <- function(x,
13                      readerControl = list(reader = x$DefaultReader, language = "eng"),                      readerControl = list(reader = x$DefaultReader, language = "en"),
14                      dbControl = list(dbName = "", dbType = "DB1"),                      dbControl = list(dbName = "", dbType = "DB1"),
15                      ...) {                      ...) {
16      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)
17    
18        if (is.function(readerControl$init))
19            readerControl$init()
20    
21        if (is.function(readerControl$exit))
22            on.exit(readerControl$exit())
23    
24      if (!filehash::dbCreate(dbControl$dbName, dbControl$dbType))      if (!filehash::dbCreate(dbControl$dbName, dbControl$dbType))
25          stop("error in creating database")          stop("error in creating database")
26      db <- filehash::dbInit(dbControl$dbName, dbControl$dbType)      db <- filehash::dbInit(dbControl$dbName, dbControl$dbType)
# Line 29  Line 35 
35      while (!eoi(x)) {      while (!eoi(x)) {
36          x <- stepNext(x)          x <- stepNext(x)
37          elem <- getElem(x)          elem <- getElem(x)
38          doc <- readerControl$reader(elem, readerControl$language, as.character(counter))          doc <- readerControl$reader(elem, readerControl$language, if (is.null(x$Names)) as.character(counter) else x$Names[counter])
39          filehash::dbInsert(db, ID(doc), doc)          filehash::dbInsert(db, ID(doc), doc)
40          if (x$Length > 0) tdl[[counter]] <- ID(doc)          if (x$Length > 0) tdl[[counter]] <- ID(doc)
41          else tdl <- c(tdl, ID(doc))          else tdl <- c(tdl, ID(doc))
42          counter <- counter + 1          counter <- counter + 1
43      }      }
44        names(tdl) <- x$Names
45    
46      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)
47      filehash::dbInsert(db, "DMetaData", df)      filehash::dbInsert(db, "DMetaData", df)
# Line 58  Line 65 
65    
66  # The "..." are additional arguments for the FunctionGenerator reader  # The "..." are additional arguments for the FunctionGenerator reader
67  VCorpus <- Corpus <- function(x,  VCorpus <- Corpus <- function(x,
68                      readerControl = list(reader = x$DefaultReader, language = "eng"),                                readerControl = list(reader = x$DefaultReader, language = "en"),
69                      ...) {                      ...) {
70      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)
71    
72        if (is.function(readerControl$init))
73            readerControl$init()
74    
75        if (is.function(readerControl$exit))
76            on.exit(readerControl$exit())
77    
78      # Allocate memory in advance if length is known      # Allocate memory in advance if length is known
79      tdl <- if (x$Length > 0)      tdl <- if (x$Length > 0)
80          vector("list", as.integer(x$Length))          vector("list", as.integer(x$Length))
# Line 71  Line 84 
84      if (x$Vectorized)      if (x$Vectorized)
85          tdl <- mapply(function(x, id) readerControl$reader(x, readerControl$language, id),          tdl <- mapply(function(x, id) readerControl$reader(x, readerControl$language, id),
86                        pGetElem(x),                        pGetElem(x),
87                        id = as.character(seq_len(x$Length)),                        id = if (is.null(x$Names)) as.character(seq_len(x$Length)) else x$Names,
88                        SIMPLIFY = FALSE)                        SIMPLIFY = FALSE)
89      else {      else {
90          counter <- 1          counter <- 1
91          while (!eoi(x)) {          while (!eoi(x)) {
92              x <- stepNext(x)              x <- stepNext(x)
93              elem <- getElem(x)              elem <- getElem(x)
94              doc <- readerControl$reader(elem, readerControl$language, as.character(counter))              doc <- readerControl$reader(elem, readerControl$language, if (is.null(x$Names)) as.character(counter) else x$Names[counter])
95              if (x$Length > 0)              if (x$Length > 0)
96                  tdl[[counter]] <- doc                  tdl[[counter]] <- doc
97              else              else
# Line 86  Line 99 
99              counter <- counter + 1              counter <- counter + 1
100          }          }
101      }      }
102        names(tdl) <- x$Names
103      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)
104      .VCorpus(tdl, .MetaDataNode(), df)      .VCorpus(tdl, .MetaDataNode(), df)
105  }  }
# Line 108  Line 121 
121      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])
122      counter <- 1      counter <- 1
123      for (id in unclass(x)[i]) {      for (id in unclass(x)[i]) {
124          if (identical(length(value), 1)) db[[id]] <- value          if (identical(length(value), 1L)) db[[id]] <- value
125          else db[[id]] <- value[[counter]]          else db[[id]] <- value[[counter]]
126          counter <- counter + 1          counter <- counter + 1
127      }      }
128      x      x
129  }  }
130    
131    .map_name_index <- function(x, i) {
132        if (is.character(i)) {
133            if (is.null(names(x)))
134                match(i, meta(x, "ID", type = "local"))
135            else
136                match(i, names(x))
137        }
138        i
139    }
140    
141  `[[.PCorpus` <-  function(x, i) {  `[[.PCorpus` <-  function(x, i) {
142        i <- .map_name_index(x, i)
143      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])
144      filehash::dbFetch(db, NextMethod("[["))      filehash::dbFetch(db, NextMethod("[["))
145  }  }
146  `[[.VCorpus` <-  function(x, i) {  `[[.VCorpus` <-  function(x, i) {
147        i <- .map_name_index(x, i)
148      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")
149      if (!is.null(lazyTmMap))      if (!is.null(lazyTmMap))
150          .Call("copyCorpus", x, materialize(x, i))          .Call("copyCorpus", x, materialize(x, i))
# Line 127  Line 152 
152  }  }
153    
154  `[[<-.PCorpus` <-  function(x, i, value) {  `[[<-.PCorpus` <-  function(x, i, value) {
155        i <- .map_name_index(x, i)
156      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])
157      index <- unclass(x)[[i]]      index <- unclass(x)[[i]]
158      db[[index]] <- value      db[[index]] <- value
159      x      x
160  }  }
161  `[[<-.VCorpus` <-  function(x, i, value) {  `[[<-.VCorpus` <-  function(x, i, value) {
162        i <- .map_name_index(x, i)
163      # Mark new objects as not active for lazy mapping      # Mark new objects as not active for lazy mapping
164      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")
165      if (!is.null(lazyTmMap)) {      if (!is.null(lazyTmMap)) {
# Line 224  Line 251 
251  {  {
252      args <- list(...)      args <- list(...)
253    
254      if (identical(length(args), 0))      if (identical(length(args), 0L))
255          return(x)          return(x)
256    
257      if (!all(unlist(lapply(args, inherits, class(x)))))      if (!all(unlist(lapply(args, inherits, class(x)))))
# Line 233  Line 260 
260      if (inherits(x, "PCorpus"))      if (inherits(x, "PCorpus"))
261          stop("concatenation of corpora with underlying databases is not supported")          stop("concatenation of corpora with underlying databases is not supported")
262    
263      Reduce(c2, base::c(list(x), args))      l <- base::c(list(x), args)
264        if (recursive)
265            Reduce(c2, l)
266        else {
267            l <- do.call("c", lapply(l, unclass))
268            .VCorpus(l,
269                     cmeta = .MetaDataNode(),
270                     dmeta = data.frame(MetaID = rep(0, length(l)), stringsAsFactors = FALSE))
271        }
272  }  }
273    
274  c.TextDocument <- function(x, ..., recursive = FALSE) {  c.TextDocument <- function(x, ..., recursive = FALSE) {
275      args <- list(...)      args <- list(...)
276    
277      if (identical(length(args), 0))      if (identical(length(args), 0L))
278          return(x)          return(x)
279    
280      if (!all(unlist(lapply(args, inherits, class(x)))))      if (!all(unlist(lapply(args, inherits, class(x)))))

Legend:
Removed from v.1021  
changed lines
  Added in v.1114

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge