SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 957, Fri Jun 12 12:47:57 2009 UTC revision 958, Sat Jun 13 06:06:42 2009 UTC
# Line 1  Line 1 
1  # Author: Ingo Feinerer  # Author: Ingo Feinerer
2    
3  FCorpus <- function(object, readerControl = list(language = "eng")) {  prepareReader <- function(readerControl, defaultReader = NULL, ...) {
4        if (is.null(readerControl$reader))
5            readerControl$reader <- defaultReader
6        if (is(readerControl$reader, "FunctionGenerator"))
7            readerControl$reader <- readerControl$reader(...)
8      if (is.null(readerControl$language))      if (is.null(readerControl$language))
9          readerControl$language <- "eng"          readerControl$language <- "eng"
10        readerControl
11    }
12    
13    FCorpus <- function(object, readerControl = list(language = "eng")) {
14        readerControl <- prepareReader(readerControl, object@DefaultReader, ...)
15    
16      if (!object@Vectorized)      if (!object@Vectorized)
17          stop("Source is not vectorized")          stop("Source is not vectorized")
# Line 19  Line 28 
28                      readerControl = list(reader = object@DefaultReader, language = "eng"),                      readerControl = list(reader = object@DefaultReader, language = "eng"),
29                      dbControl = list(dbName = "", dbType = "DB1"),                      dbControl = list(dbName = "", dbType = "DB1"),
30                      ...) {                      ...) {
31      if (is.null(readerControl$reader))      readerControl <- prepareReader(readerControl, object@DefaultReader, ...)
         readerControl$reader <- object@DefaultReader  
     if (is(readerControl$reader, "FunctionGenerator"))  
         readerControl$reader <- readerControl$reader(...)  
     if (is.null(readerControl$language))  
         readerControl$language <- "eng"  
32    
33      if (!filehash::dbCreate(dbControl$dbName, dbControl$dbType))      if (!filehash::dbCreate(dbControl$dbName, dbControl$dbType))
34          stop("error in creating database")          stop("error in creating database")
# Line 63  Line 67 
67  SCorpus <- Corpus <- function(object,  SCorpus <- Corpus <- function(object,
68                      readerControl = list(reader = object@DefaultReader, language = "eng"),                      readerControl = list(reader = object@DefaultReader, language = "eng"),
69                      ...) {                      ...) {
70      if (is.null(readerControl$reader))      readerControl <- prepareReader(readerControl, object@DefaultReader, ...)
         readerControl$reader <- object@DefaultReader  
     if (is(readerControl$reader, "FunctionGenerator"))  
         readerControl$reader <- readerControl$reader(...)  
     if (is.null(readerControl$language))  
         readerControl$language <- "eng"  
71    
72      # Allocate memory in advance if length is known      # Allocate memory in advance if length is known
73      tdl <- if (object@Length > 0)      tdl <- if (object@Length > 0)
# Line 391  Line 390 
390      list(root = set_id(object), left.mapping = left.mapping, right.mapping = mapping)      list(root = set_id(object), left.mapping = left.mapping, right.mapping = mapping)
391  }  }
392    
 # TODO: Implement concatenation for other corpus types  
393  setMethod("c",  setMethod("c",
394            signature(x = "Corpus"),            signature(x = "Corpus"),
395            function(x, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), recursive = TRUE) {            function(x, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), recursive = FALSE) {
396                args <- list(...)                args <- list(...)
397                if (identical(length(args), 0)) return(x)                if (identical(length(args), 0)) return(x)
398    
399                if (!all(sapply(args, inherits, class(x))))                if (!all(sapply(args, inherits, class(x))))
400                    stop("not all arguments are of the same class")                    stop("not all arguments are of the same corpus type")
401    
402                  if (inherits(x, "PCorpus"))
403                      stop("concatenation of corpora with underlying databases is not supported")
404    
405                Reduce(c2, base::c(list(x), args))                Reduce(c2, base::c(list(x), args))
406            })            })
407    
408  setGeneric("c2", function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), recursive = TRUE) standardGeneric("c2"))  setGeneric("c2", function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME"))) standardGeneric("c2"))
409  setMethod("c2", signature(x = "FCorpus", y = "FCorpus"),  setMethod("c2", signature(x = "FCorpus", y = "FCorpus"),
410            function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), recursive = TRUE) {            function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME"))) {
411                new("FCorpus", .Data = c(as(x, "list"), as(y, "list")))                new("FCorpus", .Data = c(as(x, "list"), as(y, "list")))
412            })            })
413  setMethod("c2", signature(x = "SCorpus", y = "SCorpus"),  setMethod("c2", signature(x = "SCorpus", y = "SCorpus"),
414            function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), recursive = TRUE) {            function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME"))) {
415                object <- x                object <- x
416                # Concatenate data slots                # Concatenate data slots
417                object@.Data <- c(as(x, "list"), as(y, "list"))                object@.Data <- c(as(x, "list"), as(y, "list"))
# Line 462  Line 463 
463    
464  setMethod("c",  setMethod("c",
465            signature(x = "TextDocument"),            signature(x = "TextDocument"),
466            function(x, ..., recursive = TRUE){            function(x, ..., recursive = FALSE){
467                args <- list(...)                args <- list(...)
468                if (identical(length(args), 0)) return(x)                if (identical(length(args), 0)) return(x)
469    
# Line 558  Line 559 
559                        NodeID = 0,                        NodeID = 0,
560                        MetaData = list(create_date = as.POSIXlt(Sys.time(), tz = "GMT"), creator = Sys.getenv("LOGNAME")),                        MetaData = list(create_date = as.POSIXlt(Sys.time(), tz = "GMT"), creator = Sys.getenv("LOGNAME")),
561                        children = list())                        children = list())
562      data <- list()      data <- vector("list", length(from))
563      counter <- 1      counter <- 1
564      for (f in from) {      for (f in from) {
565          doc <- new("PlainTextDocument",          data[[counter]] <- new("PlainTextDocument",
566                     .Data = f,                     .Data = f,
567                     Author = "", DateTimeStamp = as.POSIXlt(Sys.time(), tz = "GMT"),                                 DateTimeStamp = as.POSIXlt(Sys.time(), tz = "GMT"),
568                     Description = "", ID = as.character(counter),                                 ID = as.character(counter),
569                     Origin = "", Heading = "", Language = "eng")                                 Language = "eng")
         data <- c(data, list(doc))  
570          counter <- counter + 1          counter <- counter + 1
571      }      }
572      new("SCorpus", .Data = data,      new("SCorpus", .Data = data,

Legend:
Removed from v.957  
changed lines
  Added in v.958

R-Forge@R-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business University of Wisconsin - Madison Powered By FusionForge