SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 950, Thu May 14 15:17:18 2009 UTC revision 958, Sat Jun 13 06:06:42 2009 UTC
# Line 1  Line 1 
1  # Author: Ingo Feinerer  # Author: Ingo Feinerer
2    
3  FCorpus <- function(object, readerControl = list(language = "eng")) {  prepareReader <- function(readerControl, defaultReader = NULL, ...) {
4        if (is.null(readerControl$reader))
5            readerControl$reader <- defaultReader
6        if (is(readerControl$reader, "FunctionGenerator"))
7            readerControl$reader <- readerControl$reader(...)
8      if (is.null(readerControl$language))      if (is.null(readerControl$language))
9          readerControl$language <- "eng"          readerControl$language <- "eng"
10        readerControl
11    }
12    
13    FCorpus <- function(object, readerControl = list(language = "eng")) {
14        readerControl <- prepareReader(readerControl, object@DefaultReader, ...)
15    
16      if (!object@Vectorized)      if (!object@Vectorized)
17          stop("Source is not vectorized")          stop("Source is not vectorized")
# Line 19  Line 28 
28                      readerControl = list(reader = object@DefaultReader, language = "eng"),                      readerControl = list(reader = object@DefaultReader, language = "eng"),
29                      dbControl = list(dbName = "", dbType = "DB1"),                      dbControl = list(dbName = "", dbType = "DB1"),
30                      ...) {                      ...) {
31      if (is.null(readerControl$reader))      readerControl <- prepareReader(readerControl, object@DefaultReader, ...)
         readerControl$reader <- object@DefaultReader  
     if (is(readerControl$reader, "FunctionGenerator"))  
         readerControl$reader <- readerControl$reader(...)  
     if (is.null(readerControl$language))  
         readerControl$language <- "eng"  
32    
33      if (!filehash::dbCreate(dbControl$dbName, dbControl$dbType))      if (!filehash::dbCreate(dbControl$dbName, dbControl$dbType))
34          stop("error in creating database")          stop("error in creating database")
# Line 63  Line 67 
67  SCorpus <- Corpus <- function(object,  SCorpus <- Corpus <- function(object,
68                      readerControl = list(reader = object@DefaultReader, language = "eng"),                      readerControl = list(reader = object@DefaultReader, language = "eng"),
69                      ...) {                      ...) {
70      if (is.null(readerControl$reader))      readerControl <- prepareReader(readerControl, object@DefaultReader, ...)
         readerControl$reader <- object@DefaultReader  
     if (is(readerControl$reader, "FunctionGenerator"))  
         readerControl$reader <- readerControl$reader(...)  
     if (is.null(readerControl$language))  
         readerControl$language <- "eng"  
71    
72      # Allocate memory in advance if length is known      # Allocate memory in advance if length is known
73      tdl <- if (object@Length > 0)      tdl <- if (object@Length > 0)
# Line 111  Line 110 
110                if (lazy)                if (lazy)
111                    warning("lazy mapping is deactivated")                    warning("lazy mapping is deactivated")
112    
113                lapply(object, FUN, ..., DMetaData = data.frame())                new("FCorpus", .Data = lapply(object, FUN, ..., DMetaData = data.frame()))
114            })            })
115  setMethod("tmMap",  setMethod("tmMap",
116            signature(object = "SCorpus", FUN = "function"),            signature(object = "SCorpus", FUN = "function"),
# Line 277  Line 276 
276  }  }
277    
278  setMethod("[",  setMethod("[",
279              signature(x = "FCorpus", i = "ANY", j = "ANY", drop = "ANY"),
280              function(x, i, j, ... , drop) {
281                  if (missing(i)) return(x)
282    
283                  x@.Data <- x@.Data[i, ..., drop = FALSE]
284                  x
285              })
286    setMethod("[",
287            signature(x = "PCorpus", i = "ANY", j = "ANY", drop = "ANY"),            signature(x = "PCorpus", i = "ANY", j = "ANY", drop = "ANY"),
288            function(x, i, j, ... , drop) {            function(x, i, j, ... , drop) {
289                if (missing(i)) return(x)                if (missing(i)) return(x)
# Line 287  Line 294 
294                else x@DMetaData[[1 , "subset"]] <- index[i]                else x@DMetaData[[1 , "subset"]] <- index[i]
295                x                x
296            })            })
   
297  setMethod("[",  setMethod("[",
298            signature(x = "SCorpus", i = "ANY", j = "ANY", drop = "ANY"),            signature(x = "SCorpus", i = "ANY", j = "ANY", drop = "ANY"),
299            function(x, i, j, ... , drop) {            function(x, i, j, ... , drop) {
# Line 381  Line 387 
387          return(object)          return(object)
388      }      }
389    
390      return(list(root = set_id(object), left.mapping = left.mapping, right.mapping = mapping))      list(root = set_id(object), left.mapping = left.mapping, right.mapping = mapping)
391  }  }
392    
 # TODO: Implement concatenation for other corpus types  
393  setMethod("c",  setMethod("c",
394            signature(x = "Corpus"),            signature(x = "Corpus"),
395            function(x, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), recursive = TRUE) {            function(x, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), recursive = FALSE) {
396                args <- list(...)                args <- list(...)
397                if (length(args) == 0)                if (identical(length(args), 0)) return(x)
398                    return(x)  
399                  if (!all(sapply(args, inherits, class(x))))
400                      stop("not all arguments are of the same corpus type")
401    
402                if (!all(sapply(args, inherits, "SCorpus")))                if (inherits(x, "PCorpus"))
403                    stop("not all arguments are standard corpora")                    stop("concatenation of corpora with underlying databases is not supported")
404    
405                Reduce(c2, base::c(list(x), args))                Reduce(c2, base::c(list(x), args))
406            })            })
407    
408  setGeneric("c2", function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), recursive = TRUE) standardGeneric("c2"))  setGeneric("c2", function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME"))) standardGeneric("c2"))
409  setMethod("c2",  setMethod("c2", signature(x = "FCorpus", y = "FCorpus"),
410            signature(x = "SCorpus", y = "SCorpus"),            function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME"))) {
411            function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME")), recursive = TRUE) {                new("FCorpus", .Data = c(as(x, "list"), as(y, "list")))
412              })
413    setMethod("c2", signature(x = "SCorpus", y = "SCorpus"),
414              function(x, y, ..., meta = list(merge_date = as.POSIXlt(Sys.time(), tz = "GMT"), merger = Sys.getenv("LOGNAME"))) {
415                object <- x                object <- x
416                # Concatenate data slots                # Concatenate data slots
417                object@.Data <- c(as(x, "list"), as(y, "list"))                object@.Data <- c(as(x, "list"), as(y, "list"))
# Line 448  Line 458 
458                y.dmeta.aug <- cbind(DMetaData(y), na.matrix)                y.dmeta.aug <- cbind(DMetaData(y), na.matrix)
459                object@DMetaData <- rbind(x.dmeta.aug, y.dmeta.aug)                object@DMetaData <- rbind(x.dmeta.aug, y.dmeta.aug)
460    
461                return(object)                object
462            })            })
463    
464  setMethod("c",  setMethod("c",
465            signature(x = "TextDocument"),            signature(x = "TextDocument"),
466            function(x, ..., recursive = TRUE){            function(x, ..., recursive = FALSE){
467                args <- list(...)                args <- list(...)
468                if (identical(length(args), 0)) return(x)                if (identical(length(args), 0)) return(x)
469    
# Line 549  Line 559 
559                        NodeID = 0,                        NodeID = 0,
560                        MetaData = list(create_date = as.POSIXlt(Sys.time(), tz = "GMT"), creator = Sys.getenv("LOGNAME")),                        MetaData = list(create_date = as.POSIXlt(Sys.time(), tz = "GMT"), creator = Sys.getenv("LOGNAME")),
561                        children = list())                        children = list())
562      data <- list()      data <- vector("list", length(from))
563      counter <- 1      counter <- 1
564      for (f in from) {      for (f in from) {
565          doc <- new("PlainTextDocument",          data[[counter]] <- new("PlainTextDocument",
566                     .Data = f,                     .Data = f,
567                     Author = "", DateTimeStamp = as.POSIXlt(Sys.time(), tz = "GMT"),                                 DateTimeStamp = as.POSIXlt(Sys.time(), tz = "GMT"),
568                     Description = "", ID = as.character(counter),                                 ID = as.character(counter),
569                     Origin = "", Heading = "", Language = "eng")                                 Language = "eng")
         data <- c(data, list(doc))  
570          counter <- counter + 1          counter <- counter + 1
571      }      }
572      new("SCorpus", .Data = data,      new("SCorpus", .Data = data,

Legend:
Removed from v.950  
changed lines
  Added in v.958

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge