SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1025, Fri Dec 11 08:56:22 2009 UTC revision 1108, Fri Oct 22 18:32:47 2010 UTC
# Line 10  Line 10 
10  DBControl <- function(x) attr(x, "DBControl")  DBControl <- function(x) attr(x, "DBControl")
11    
12  PCorpus <- function(x,  PCorpus <- function(x,
13                      readerControl = list(reader = x$DefaultReader, language = "eng"),                      readerControl = list(reader = x$DefaultReader, language = "en"),
14                      dbControl = list(dbName = "", dbType = "DB1"),                      dbControl = list(dbName = "", dbType = "DB1"),
15                      ...) {                      ...) {
16      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)
# Line 29  Line 29 
29      while (!eoi(x)) {      while (!eoi(x)) {
30          x <- stepNext(x)          x <- stepNext(x)
31          elem <- getElem(x)          elem <- getElem(x)
32          doc <- readerControl$reader(elem, readerControl$language, as.character(counter))          doc <- readerControl$reader(elem, readerControl$language, if (is.null(x$Names)) as.character(counter) else x$Names[counter])
33          filehash::dbInsert(db, ID(doc), doc)          filehash::dbInsert(db, ID(doc), doc)
34          if (x$Length > 0) tdl[[counter]] <- ID(doc)          if (x$Length > 0) tdl[[counter]] <- ID(doc)
35          else tdl <- c(tdl, ID(doc))          else tdl <- c(tdl, ID(doc))
36          counter <- counter + 1          counter <- counter + 1
37      }      }
38        names(tdl) <- x$Names
39    
40      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)
41      filehash::dbInsert(db, "DMetaData", df)      filehash::dbInsert(db, "DMetaData", df)
# Line 58  Line 59 
59    
60  # The "..." are additional arguments for the FunctionGenerator reader  # The "..." are additional arguments for the FunctionGenerator reader
61  VCorpus <- Corpus <- function(x,  VCorpus <- Corpus <- function(x,
62                      readerControl = list(reader = x$DefaultReader, language = "eng"),                                readerControl = list(reader = x$DefaultReader, language = "en"),
63                      ...) {                      ...) {
64      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)      readerControl <- prepareReader(readerControl, x$DefaultReader, ...)
65    
# Line 71  Line 72 
72      if (x$Vectorized)      if (x$Vectorized)
73          tdl <- mapply(function(x, id) readerControl$reader(x, readerControl$language, id),          tdl <- mapply(function(x, id) readerControl$reader(x, readerControl$language, id),
74                        pGetElem(x),                        pGetElem(x),
75                        id = as.character(seq_len(x$Length)),                        id = if (is.null(x$Names)) as.character(seq_len(x$Length)) else x$Names,
76                        SIMPLIFY = FALSE)                        SIMPLIFY = FALSE)
77      else {      else {
78          counter <- 1          counter <- 1
79          while (!eoi(x)) {          while (!eoi(x)) {
80              x <- stepNext(x)              x <- stepNext(x)
81              elem <- getElem(x)              elem <- getElem(x)
82              doc <- readerControl$reader(elem, readerControl$language, as.character(counter))              doc <- readerControl$reader(elem, readerControl$language, if (is.null(x$Names)) as.character(counter) else x$Names[counter])
83              if (x$Length > 0)              if (x$Length > 0)
84                  tdl[[counter]] <- doc                  tdl[[counter]] <- doc
85              else              else
# Line 86  Line 87 
87              counter <- counter + 1              counter <- counter + 1
88          }          }
89      }      }
90        names(tdl) <- x$Names
91      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)      df <- data.frame(MetaID = rep(0, length(tdl)), stringsAsFactors = FALSE)
92      .VCorpus(tdl, .MetaDataNode(), df)      .VCorpus(tdl, .MetaDataNode(), df)
93  }  }
# Line 115  Line 116 
116      x      x
117  }  }
118    
119    .map_name_index <- function(x, i) {
120        if (is.character(i)) {
121            if (is.null(names(x)))
122                match(i, meta(x, "ID", type = "local"))
123            else
124                match(i, names(x))
125        }
126        i
127    }
128    
129  `[[.PCorpus` <-  function(x, i) {  `[[.PCorpus` <-  function(x, i) {
130        i <- .map_name_index(x, i)
131      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])
132      filehash::dbFetch(db, NextMethod("[["))      filehash::dbFetch(db, NextMethod("[["))
133  }  }
134  `[[.VCorpus` <-  function(x, i) {  `[[.VCorpus` <-  function(x, i) {
135        i <- .map_name_index(x, i)
136      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")
137      if (!is.null(lazyTmMap))      if (!is.null(lazyTmMap))
138          .Call("copyCorpus", x, materialize(x, i))          .Call("copyCorpus", x, materialize(x, i))
# Line 127  Line 140 
140  }  }
141    
142  `[[<-.PCorpus` <-  function(x, i, value) {  `[[<-.PCorpus` <-  function(x, i, value) {
143        i <- .map_name_index(x, i)
144      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])      db <- filehash::dbInit(DBControl(x)[["dbName"]], DBControl(x)[["dbType"]])
145      index <- unclass(x)[[i]]      index <- unclass(x)[[i]]
146      db[[index]] <- value      db[[index]] <- value
147      x      x
148  }  }
149  `[[<-.VCorpus` <-  function(x, i, value) {  `[[<-.VCorpus` <-  function(x, i, value) {
150        i <- .map_name_index(x, i)
151      # Mark new objects as not active for lazy mapping      # Mark new objects as not active for lazy mapping
152      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")      lazyTmMap <- meta(x, tag = "lazyTmMap", type = "corpus")
153      if (!is.null(lazyTmMap)) {      if (!is.null(lazyTmMap)) {
# Line 233  Line 248 
248      if (inherits(x, "PCorpus"))      if (inherits(x, "PCorpus"))
249          stop("concatenation of corpora with underlying databases is not supported")          stop("concatenation of corpora with underlying databases is not supported")
250    
251      Reduce(c2, base::c(list(x), args))      l <- base::c(list(x), args)
252        if (recursive)
253            Reduce(c2, l)
254        else {
255            l <- do.call("c", lapply(l, unclass))
256            .VCorpus(l,
257                     cmeta = .MetaDataNode(),
258                     dmeta = data.frame(MetaID = rep(0, length(l)), stringsAsFactors = FALSE))
259        }
260  }  }
261    
262  c.TextDocument <- function(x, ..., recursive = FALSE) {  c.TextDocument <- function(x, ..., recursive = FALSE) {

Legend:
Removed from v.1025  
changed lines
  Added in v.1108

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge