SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1419, Sat May 2 17:23:47 2015 UTC revision 1437, Wed Jul 13 19:23:49 2016 UTC
# Line 36  Line 36 
36      p      p
37  }  }
38    
39    SimpleCorpus <-
40    function(x, control = list(language = "en"))
41    {
42        stopifnot(inherits(x, "Source"))
43    
44        if (!is.null(control$reader))
45            warning("custom reader is ignored")
46    
47        content <- if (inherits(x, "VectorSource"))
48            x$content
49        else if (inherits(x, "DirSource")) {
50            setNames(as.character(
51                       lapply(x$filelist,
52                              function(f) paste(readContent(f, x$encoding, "text"),
53                                                collapse = "\n"))
54                       ),
55                     basename(x$filelist))
56        } else
57            stop("unsupported source type")
58        s <- list(content = content,
59                  meta = CorpusMeta(language = control$language),
60                  dmeta = data.frame(row.names = seq_along(x)))
61        class(s) <- c("SimpleCorpus", "Corpus")
62        s
63    }
64    
65  Corpus <-  Corpus <-
66  VCorpus <-  VCorpus <-
67  function(x, readerControl = list(reader = reader(x), language = "en"))  function(x, readerControl = list(reader = reader(x), language = "en"))
# Line 71  Line 97 
97  }  }
98    
99  `[.PCorpus` <-  `[.PCorpus` <-
100    `[.SimpleCorpus` <-
101  function(x, i)  function(x, i)
102  {  {
103      if (!missing(i)) {      if (!missing(i)) {
# Line 79  Line 106 
106      }      }
107      x      x
108  }  }
   
109  `[.VCorpus` <-  `[.VCorpus` <-
110  function(x, i)  function(x, i)
111  {  {
# Line 108  Line 134 
134      db <- filehash::dbInit(x$dbcontrol[["dbName"]], x$dbcontrol[["dbType"]])      db <- filehash::dbInit(x$dbcontrol[["dbName"]], x$dbcontrol[["dbType"]])
135      filehash::dbFetch(db, x$content[[i]])      filehash::dbFetch(db, x$content[[i]])
136  }  }
137    `[[.SimpleCorpus` <-
138    function(x, i)
139    {
140        i <- .map_name_index(x, i)
141        n <- names(x$content)
142        PlainTextDocument(x$content[[i]],
143                          id = if (is.null(n)) i else n[i],
144                          language = meta(x, "language"))
145    }
146  `[[.VCorpus` <-  `[[.VCorpus` <-
147  function(x, i)  function(x, i)
148  {  {
# Line 140  Line 175 
175  function(x, ...)  function(x, ...)
176      setNames(content(x), as.character(lapply(content(x), meta, "id")))      setNames(content(x), as.character(lapply(content(x), meta, "id")))
177    
178    as.list.SimpleCorpus <-
179    function(x, ...)
180        as.list(content(x))
181    
182  as.VCorpus <-  as.VCorpus <-
183  function(x)  function(x)
184      UseMethod("as.VCorpus")      UseMethod("as.VCorpus")
# Line 195  Line 234 
234      x$content      x$content
235  }  }
236    
237    content.SimpleCorpus <-
238    function(x)
239        x$content
240    
241  content.PCorpus <-  content.PCorpus <-
242  function(x)  function(x)
243  {  {
# Line 205  Line 248 
248  inspect <-  inspect <-
249  function(x)  function(x)
250      UseMethod("inspect", x)      UseMethod("inspect", x)
251  inspect.PCorpus <- inspect.VCorpus <-  inspect.PCorpus <-
252    inspect.SimpleCorpus <-
253    inspect.VCorpus <-
254  function(x)  function(x)
255  {  {
256      print(x)      print(x)
# Line 214  Line 259 
259      invisible(x)      invisible(x)
260  }  }
261    
262  length.PCorpus <- length.VCorpus <-  length.PCorpus <-
263    length.SimpleCorpus <-
264    length.VCorpus <-
265  function(x)  function(x)
266      length(x$content)      length(x$content)
267    
268  names.PCorpus <- names.VCorpus <-  names.PCorpus <-
269    names.SimpleCorpus <-
270    names.VCorpus <-
271  function(x)  function(x)
272      as.character(meta(x, "id", "local"))      as.character(meta(x, "id", "local"))
273    
# Line 229  Line 278 
278      x      x
279  }  }
280    
281  format.PCorpus <- format.VCorpus <-  format.PCorpus <-
282    format.SimpleCorpus <-
283    format.VCorpus <-
284  function(x, ...)  function(x, ...)
285  {  {
286      c(sprintf("<<%s>>", class(x)[1L]),      c(sprintf("<<%s>>", class(x)[1L]),

Legend:
Removed from v.1419  
changed lines
  Added in v.1437

R-Forge@R-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business University of Wisconsin - Madison Powered By FusionForge