SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1333, Fri Apr 18 10:38:46 2014 UTC revision 1342, Sat Apr 19 17:06:45 2014 UTC
# Line 2  Line 2 
2    
3  PCorpus <-  PCorpus <-
4  function(x,  function(x,
5           readerControl = list(reader = x$defaultreader, language = "en"),           readerControl = list(reader = reader(x), language = "en"),
6           dbControl = list(dbName = "", dbType = "DB1"))           dbControl = list(dbName = "", dbType = "DB1"))
7  {  {
8      stopifnot(inherits(x, "Source"))      stopifnot(inherits(x, "Source"))
9    
10      readerControl <- prepareReader(readerControl, x$defaultreader)      readerControl <- prepareReader(readerControl, reader(x))
11    
12      if (is.function(readerControl$init))      if (is.function(readerControl$init))
13          readerControl$init()          readerControl$init()
# Line 20  Line 20 
20      db <- filehash::dbInit(dbControl$dbName, dbControl$dbType)      db <- filehash::dbInit(dbControl$dbName, dbControl$dbType)
21    
22      # Allocate memory in advance if length is known      # Allocate memory in advance if length is known
23      tdl <- if (x$length > 0) vector("list", as.integer(x$length)) else list()      tdl <- if (length(x) > 0) vector("list", as.integer(length(x))) else list()
24    
25      counter <- 1      counter <- 1
26      while (!eoi(x)) {      while (!eoi(x)) {
27          x <- stepNext(x)          x <- stepNext(x)
28          elem <- getElem(x)          elem <- getElem(x)
29          id <- if (is.null(x$names) || is.na(x$names))          id <- if (is.null(names(x)) || is.na(names(x)))
30              as.character(counter)              as.character(counter)
31          else          else
32              x$names[counter]              names(x)[counter]
33          doc <- readerControl$reader(elem, readerControl$language, id)          doc <- readerControl$reader(elem, readerControl$language, id)
34          filehash::dbInsert(db, meta(doc, "id"), doc)          filehash::dbInsert(db, meta(doc, "id"), doc)
35          if (x$length > 0) tdl[[counter]] <- meta(doc, "id")          tdl[[counter]] <- meta(doc, "id")
         else tdl <- c(tdl, meta(doc, "id"))  
36          counter <- counter + 1          counter <- counter + 1
37      }      }
38      if (!is.null(x$names) && !is.na(x$names))      if (!is.null(names(x)) && !is.na(names(x)))
39          names(tdl) <- x$names          names(tdl) <- names(x)
40    
41      structure(list(content = tdl,      structure(list(content = tdl,
42                     meta = CorpusMeta(),                     meta = CorpusMeta(),
# Line 47  Line 46 
46  }  }
47    
48  VCorpus <-  VCorpus <-
49  function(x, readerControl = list(reader = x$defaultreader, language = "en"))  function(x, readerControl = list(reader = reader(x), language = "en"))
50  {  {
51      stopifnot(inherits(x, "Source"))      stopifnot(inherits(x, "Source"))
52    
53      readerControl <- prepareReader(readerControl, x$defaultreader)      readerControl <- prepareReader(readerControl, reader(x))
54    
55      if (is.function(readerControl$init))      if (is.function(readerControl$init))
56          readerControl$init()          readerControl$init()
# Line 60  Line 59 
59          on.exit(readerControl$exit())          on.exit(readerControl$exit())
60    
61      # Allocate memory in advance if length is known      # Allocate memory in advance if length is known
62      tdl <- if (x$length > 0) vector("list", as.integer(x$length)) else list()      tdl <- if (length(x) > 0) vector("list", as.integer(length(x))) else list()
63    
64      if (x$vectorized)      # Check for parallel element access
65        if (is.function(getS3method("pGetElem", class(x), TRUE)))
66          tdl <- mapply(function(elem, id)          tdl <- mapply(function(elem, id)
67                            readerControl$reader(elem, readerControl$language, id),                            readerControl$reader(elem, readerControl$language, id),
68                        pGetElem(x),                        pGetElem(x),
69                        id = if (is.null(x$names) || is.na(x$names))                        id = if (is.null(names(x)) || is.na(names(x)))
70                            as.character(seq_len(x$length))                            as.character(seq_len(length(x)))
71                        else x$names,                        else names(x),
72                        SIMPLIFY = FALSE)                        SIMPLIFY = FALSE)
73      else {      else {
74          counter <- 1          counter <- 1
75          while (!eoi(x)) {          while (!eoi(x)) {
76              x <- stepNext(x)              x <- stepNext(x)
77              elem <- getElem(x)              elem <- getElem(x)
78              id <- if (is.null(x$names) || is.na(x$names))              id <- if (is.null(names(x)) || is.na(names(x)))
79                  as.character(counter)                  as.character(counter)
80              else              else
81                  x$names[counter]                  names(x)[counter]
82              doc <- readerControl$reader(elem, readerControl$language, id)              doc <- readerControl$reader(elem, readerControl$language, id)
             if (x$length > 0)  
83                  tdl[[counter]] <- doc                  tdl[[counter]] <- doc
             else  
                 tdl <- c(tdl, list(doc))  
84              counter <- counter + 1              counter <- counter + 1
85          }          }
86      }      }
87      if (!is.null(x$names) && !is.na(x$names))      if (!is.null(names(x)) && !is.na(names(x)))
88          names(tdl) <- x$names          names(tdl) <- names(x)
89    
90      structure(list(content = tdl,      structure(list(content = tdl,
91                     meta = CorpusMeta(),                     meta = CorpusMeta(),
# Line 121  Line 118 
118  .map_name_index <-  .map_name_index <-
119  function(x, i)  function(x, i)
120  {  {
121      if (is.character(i))      if (is.character(i)) {
122          match(i, if (is.null(names(x))) meta(x, "id", "local") else names(x))          n <- names(x$content)
123      else          match(i, if (is.null(n)) meta(x, "id", "local") else n)
124        } else
125          i          i
126  }  }
127    

Legend:
Removed from v.1333  
changed lines
  Added in v.1342

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge