SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1409, Fri Feb 27 16:10:18 2015 UTC revision 1467, Sun Jan 22 18:06:19 2017 UTC
# Line 1  Line 1 
1  # Author: Ingo Feinerer  # Author: Ingo Feinerer
2    
3    Corpus <-
4    function(x, readerControl = list(reader = reader(x), language = "en"))
5    {
6        stopifnot(inherits(x, "Source"))
7    
8        readerControl <- prepareReader(readerControl, reader(x))
9    
10        if ((inherits(x, "DirSource") || inherits(x, "VectorSource")) &&
11            identical(readerControl$reader, readPlain))
12            SimpleCorpus(x, readerControl)
13        else
14            VCorpus(x, readerControl)
15    }
16    
17  PCorpus <-  PCorpus <-
18  function(x,  function(x,
19           readerControl = list(reader = reader(x), language = "en"),           readerControl = list(reader = reader(x), language = "en"),
# Line 36  Line 50 
50      p      p
51  }  }
52    
53  Corpus <-  SimpleCorpus <-
54    function(x, control = list(language = "en"))
55    {
56        stopifnot(inherits(x, "Source"))
57    
58        if (!is.null(control$reader) && !identical(control$reader, readPlain))
59            warning("custom reader is ignored")
60    
61        content <- if (inherits(x, "VectorSource")) {
62            if (is.character(x$content)) x$content else as.character(x$content)
63        } else if (inherits(x, "DirSource")) {
64            setNames(as.character(
65                       lapply(x$filelist,
66                              function(f) paste(readContent(f, x$encoding, "text"),
67                                                collapse = "\n"))
68                       ),
69                     basename(x$filelist))
70        } else
71            stop("unsupported source type")
72        s <- list(content = content,
73                  meta = CorpusMeta(language = control$language),
74                  dmeta = data.frame(row.names = seq_along(x)))
75        class(s) <- c("SimpleCorpus", "Corpus")
76        s
77    }
78    
79  VCorpus <-  VCorpus <-
80  function(x, readerControl = list(reader = reader(x), language = "en"))  function(x, readerControl = list(reader = reader(x), language = "en"))
81  {  {
# Line 71  Line 110 
110  }  }
111    
112  `[.PCorpus` <-  `[.PCorpus` <-
113    `[.SimpleCorpus` <-
114  function(x, i)  function(x, i)
115  {  {
116      if (!missing(i)) {      if (!missing(i)) {
# Line 79  Line 119 
119      }      }
120      x      x
121  }  }
   
122  `[.VCorpus` <-  `[.VCorpus` <-
123  function(x, i)  function(x, i)
124  {  {
# Line 108  Line 147 
147      db <- filehash::dbInit(x$dbcontrol[["dbName"]], x$dbcontrol[["dbType"]])      db <- filehash::dbInit(x$dbcontrol[["dbName"]], x$dbcontrol[["dbType"]])
148      filehash::dbFetch(db, x$content[[i]])      filehash::dbFetch(db, x$content[[i]])
149  }  }
150    `[[.SimpleCorpus` <-
151    function(x, i)
152    {
153        i <- .map_name_index(x, i)
154        n <- names(x$content)
155        PlainTextDocument(x$content[[i]],
156                          id = if (is.null(n)) i else n[i],
157                          language = meta(x, "language"))
158    }
159  `[[.VCorpus` <-  `[[.VCorpus` <-
160  function(x, i)  function(x, i)
161  {  {
162      i <- .map_name_index(x, i)      i <- .map_name_index(x, i)
163      if (!is.null(x$lazy))      if (!is.null(x$lazy))
164          .Call(copyCorpus, x, materialize(x, i))          .Call(tm_copyCorpus, x, materialize(x, i))
165      x$content[[i]]      x$content[[i]]
166  }  }
167    
# Line 138  Line 186 
186    
187  as.list.PCorpus <- as.list.VCorpus <-  as.list.PCorpus <- as.list.VCorpus <-
188  function(x, ...)  function(x, ...)
189      content(x)      setNames(content(x), as.character(lapply(content(x), meta, "id")))
190    
191    as.list.SimpleCorpus <-
192    function(x, ...)
193        as.list(content(x))
194    
195  as.VCorpus <-  as.VCorpus <-
196  function(x)  function(x)
# Line 191  Line 243 
243  function(x)  function(x)
244  {  {
245      if (!is.null(x$lazy))      if (!is.null(x$lazy))
246          .Call(copyCorpus, x, materialize(x))          .Call(tm_copyCorpus, x, materialize(x))
247      x$content      x$content
248  }  }
249    
250    content.SimpleCorpus <-
251    function(x)
252        x$content
253    
254  content.PCorpus <-  content.PCorpus <-
255  function(x)  function(x)
256  {  {
# Line 205  Line 261 
261  inspect <-  inspect <-
262  function(x)  function(x)
263      UseMethod("inspect", x)      UseMethod("inspect", x)
264  inspect.PCorpus <- inspect.VCorpus <-  inspect.PCorpus <-
265    inspect.SimpleCorpus <-
266    inspect.VCorpus <-
267  function(x)  function(x)
268  {  {
269      print(x)      print(x)
# Line 214  Line 272 
272      invisible(x)      invisible(x)
273  }  }
274    
275  length.PCorpus <- length.VCorpus <-  length.PCorpus <-
276    length.SimpleCorpus <-
277    length.VCorpus <-
278  function(x)  function(x)
279      length(x$content)      length(x$content)
280    
281  names.PCorpus <- names.VCorpus <-  names.PCorpus <-
282    names.SimpleCorpus <-
283    names.VCorpus <-
284  function(x)  function(x)
285      as.character(meta(x, "id", "local"))      as.character(meta(x, "id", "local"))
286    
# Line 229  Line 291 
291      x      x
292  }  }
293    
294  print.PCorpus <- print.VCorpus <-  format.PCorpus <-
295    format.SimpleCorpus <-
296    format.VCorpus <-
297  function(x, ...)  function(x, ...)
298  {  {
299      writeLines(sprintf("<<%s (documents: %d, metadata (corpus/indexed): %d/%d)>>",      c(sprintf("<<%s>>", class(x)[1L]),
300                         class(x)[1],        sprintf("Metadata:  corpus specific: %d, document level (indexed): %d",
                        length(x),  
301                         length(meta(x, type = "corpus")),                         length(meta(x, type = "corpus")),
302                         ncol(meta(x, type = "indexed"))))                ncol(meta(x, type = "indexed"))),
303      invisible(x)        sprintf("Content:  documents: %d", length(x)))
304  }  }
305    
306  writeCorpus <-  writeCorpus <-

Legend:
Removed from v.1409  
changed lines
  Added in v.1467

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge