SCM

SCM Repository

[tm] Annotation of /pkg/R/source.R
ViewVC logotype

Annotation of /pkg/R/source.R

Parent Directory Parent Directory | Revision Log Revision Log


Revision 832 - (view) (download)
Original Path: trunk/tm/R/source.R

1 : feinerer 689 # Author: Ingo Feinerer
2 :    
3 :     # Source objects
4 :    
5 :     setClass("Source",
6 :     representation(LoDSupport = "logical",
7 :     Position = "numeric",
8 : feinerer 723 DefaultReader = "function",
9 : feinerer 810 Encoding = "character",
10 : feinerer 689 "VIRTUAL"))
11 :    
12 : feinerer 832 # A vector where each component is interpreted as document
13 :     setClass("VectorSource",
14 :     representation(Content = "vector"),
15 :     contains = c("Source"))
16 :    
17 : feinerer 689 # A directory with files
18 :     setClass("DirSource",
19 : feinerer 694 representation(FileList = "character"),
20 : feinerer 689 contains = c("Source"))
21 :    
22 :     # A single CSV file where each line is interpreted as document
23 :     setClass("CSVSource",
24 :     representation(URI = "ANY",
25 :     Content = "character"),
26 :     contains = c("Source"))
27 :    
28 :     # A single XML file consisting of several Reuters documents
29 :     # Works both for Reuters21578XML and RCV1 XML files
30 :     setClass("ReutersSource",
31 :     representation(URI = "ANY",
32 :     Content = "list"),
33 :     contains = c("Source"))
34 :    
35 : feinerer 694 # A single XML (RDF) file containing Gmane mailing list archive feeds
36 :     setClass("GmaneSource",
37 : feinerer 689 representation(URI = "ANY",
38 :     Content = "list"),
39 :     contains = c("Source"))
40 :    
41 :    
42 :     # Methods for Source objects
43 :    
44 : feinerer 832 setGeneric("VectorSource", function(object, encoding = "UTF-8") standardGeneric("VectorSource"))
45 :     setMethod("VectorSource",
46 :     signature(object = "vector"),
47 :     function(object, encoding = "UTF-8") {
48 :     new("VectorSource", LoDSupport = FALSE, Content = object, Position = 0,
49 :     DefaultReader = readPlain, Encoding = encoding)
50 :     })
51 :    
52 : feinerer 810 setGeneric("DirSource", function(directory, encoding = "UTF-8", recursive = FALSE) standardGeneric("DirSource"))
53 : feinerer 689 setMethod("DirSource",
54 :     signature(directory = "character"),
55 : feinerer 810 function(directory, encoding = "UTF-8", recursive = FALSE) {
56 : feinerer 689 d <- dir(directory, full.names = TRUE, recursive = recursive)
57 :     isdir <- sapply(d, file.info)["isdir",]
58 :     files <- d[isdir == FALSE]
59 :     new("DirSource", LoDSupport = TRUE, FileList = files,
60 : feinerer 810 Position = 0, DefaultReader = readPlain, Encoding = encoding)
61 : feinerer 689 })
62 :    
63 : feinerer 810 setGeneric("CSVSource", function(object, encoding = "UTF-8") standardGeneric("CSVSource"))
64 : feinerer 689 setMethod("CSVSource",
65 :     signature(object = "character"),
66 : feinerer 810 function(object, encoding = "UTF-8") {
67 :     object <- substitute(file(object, encoding = encoding))
68 : feinerer 689 con <- eval(object)
69 : feinerer 807 content <- apply(read.csv(con), 1, paste, collapse = " ")
70 : feinerer 689 new("CSVSource", LoDSupport = FALSE, URI = object,
71 : feinerer 810 Content = content, Position = 0, DefaultReader = readPlain, Encoding = encoding)
72 : feinerer 689 })
73 :     setMethod("CSVSource",
74 :     signature(object = "ANY"),
75 : feinerer 810 function(object, encoding = "UTF-8") {
76 : feinerer 689 object <- substitute(object)
77 :     con <- eval(object)
78 : feinerer 807 content <- apply(read.csv(con), 1, paste, collapse = " ")
79 : feinerer 689 new("CSVSource", LoDSupport = FALSE, URI = object,
80 : feinerer 810 Content = content, Position = 0, DefaultReader = readPlain, Encoding = encoding)
81 : feinerer 689 })
82 :    
83 : feinerer 810 setGeneric("ReutersSource", function(object, encoding = "UTF-8") standardGeneric("ReutersSource"))
84 : feinerer 689 setMethod("ReutersSource",
85 :     signature(object = "character"),
86 : feinerer 810 function(object, encoding = "UTF-8") {
87 :     object <- substitute(file(object, encoding = encoding))
88 : feinerer 689 con <- eval(object)
89 :     corpus <- paste(readLines(con), "\n", collapse = "")
90 :     close(con)
91 :     tree <- xmlTreeParse(corpus, asText = TRUE)
92 :     content <- xmlRoot(tree)$children
93 :    
94 :     new("ReutersSource", LoDSupport = FALSE, URI = object,
95 : feinerer 810 Content = content, Position = 0, DefaultReader = readReut21578XML, Encoding = encoding)
96 : feinerer 689 })
97 :     setMethod("ReutersSource",
98 :     signature(object = "ANY"),
99 : feinerer 810 function(object, encoding = "UTF-8") {
100 : feinerer 689 object <- substitute(object)
101 :     con <- eval(object)
102 :     corpus <- paste(readLines(con), "\n", collapse = "")
103 :     close(con)
104 :     tree <- xmlTreeParse(corpus, asText = TRUE)
105 :     content <- xmlRoot(tree)$children
106 :    
107 :     new("ReutersSource", LoDSupport = FALSE, URI = object,
108 : feinerer 810 Content = content, Position = 0, DefaultReader = readReut21578XML, Encoding = encoding)
109 : feinerer 689 })
110 :    
111 : feinerer 810 setGeneric("GmaneSource", function(object, encoding = "UTF-8") standardGeneric("GmaneSource"))
112 : feinerer 694 setMethod("GmaneSource",
113 : feinerer 689 signature(object = "character"),
114 : feinerer 810 function(object, encoding = "UTF-8") {
115 :     object <- substitute(file(object, encoding = encoding))
116 : feinerer 689 con <- eval(object)
117 :     corpus <- paste(readLines(con), "\n", collapse = "")
118 :     close(con)
119 :     tree <- xmlTreeParse(corpus, asText = TRUE)
120 :     content <- xmlRoot(tree)$children
121 :     content <- content[names(content) == "item"]
122 :    
123 : feinerer 694 new("GmaneSource", LoDSupport = FALSE, URI = object,
124 : feinerer 810 Content = content, Position = 0, DefaultReader = readGmane, Encoding = encoding)
125 : feinerer 689 })
126 : feinerer 694 setMethod("GmaneSource",
127 : feinerer 689 signature(object = "ANY"),
128 : feinerer 810 function(object, encoding = "UTF-8") {
129 : feinerer 689 object <- substitute(object)
130 :     con <- eval(object)
131 :     corpus <- paste(readLines(con), "\n", collapse = "")
132 :     close(con)
133 :     tree <- xmlTreeParse(corpus, asText = TRUE)
134 :     content <- xmlRoot(tree)$children
135 :     content <- content[names(content) == "item"]
136 :    
137 : feinerer 694 new("GmaneSource", LoDSupport = FALSE, URI = object,
138 : feinerer 810 Content = content, Position = 0, DefaultReader = readGmane, Encoding = encoding)
139 : feinerer 689 })
140 :    
141 : feinerer 698 setGeneric("stepNext", function(object) standardGeneric("stepNext"))
142 :     setMethod("stepNext",
143 : feinerer 832 signature(object = "VectorSource"),
144 :     function(object) {
145 :     object@Position <- object@Position + 1
146 :     object
147 :     })
148 :     setMethod("stepNext",
149 : feinerer 689 signature(object = "DirSource"),
150 :     function(object) {
151 :     object@Position <- object@Position + 1
152 :     object
153 :     })
154 : feinerer 698 setMethod("stepNext",
155 : feinerer 689 signature(object = "CSVSource"),
156 :     function(object) {
157 :     object@Position <- object@Position + 1
158 :     object
159 :     })
160 : feinerer 698 setMethod("stepNext",
161 : feinerer 689 signature(object = "ReutersSource"),
162 :     function(object) {
163 :     object@Position <- object@Position + 1
164 :     object
165 :     })
166 : feinerer 698 setMethod("stepNext",
167 : feinerer 694 signature(object = "GmaneSource"),
168 : feinerer 689 function(object) {
169 :     object@Position <- object@Position + 1
170 :     object
171 :     })
172 :    
173 : feinerer 698 setGeneric("getElem", function(object) standardGeneric("getElem"))
174 :     setMethod("getElem",
175 : feinerer 832 signature(object = "VectorSource"),
176 :     function(object) {
177 :     list(content = object@Content[object@Position],
178 :     uri = NULL)
179 :     })
180 :     setMethod("getElem",
181 : feinerer 689 signature(object = "DirSource"),
182 :     function(object) {
183 :     filename <- object@FileList[object@Position]
184 : feinerer 810 encoding <- object@Encoding
185 :     list(content = readLines(filename, encoding = encoding),
186 :     uri = substitute(file(filename, encoding = encoding)))
187 : feinerer 689 })
188 : feinerer 698 setMethod("getElem",
189 : feinerer 689 signature(object = "CSVSource"),
190 :     function(object) {
191 :     list(content = object@Content[object@Position],
192 :     uri = object@URI)
193 :     })
194 : feinerer 698 setMethod("getElem",
195 : feinerer 689 signature(object = "ReutersSource"),
196 :     function(object) {
197 :     # Construct a character representation from the XMLNode
198 :     con <- textConnection("virtual.file", "w")
199 :     saveXML(object@Content[[object@Position]], con)
200 :     close(con)
201 :    
202 :     list(content = virtual.file, uri = object@URI)
203 :     })
204 : feinerer 698 setMethod("getElem",
205 : feinerer 694 signature(object = "GmaneSource"),
206 : feinerer 689 function(object) {
207 :     # Construct a character representation from the XMLNode
208 :     con <- textConnection("virtual.file", "w")
209 :     saveXML(object@Content[[object@Position]], con)
210 :     close(con)
211 :    
212 :     list(content = virtual.file, uri = object@URI)
213 :     })
214 :    
215 :     setGeneric("eoi", function(object) standardGeneric("eoi"))
216 :     setMethod("eoi",
217 : feinerer 832 signature(object = "VectorSource"),
218 :     function(object) {
219 :     if (length(object@Content) <= object@Position)
220 :     return(TRUE)
221 :     else
222 :     return(FALSE)
223 :     })
224 :     setMethod("eoi",
225 : feinerer 689 signature(object = "DirSource"),
226 :     function(object) {
227 :     if (length(object@FileList) <= object@Position)
228 :     return(TRUE)
229 :     else
230 :     return(FALSE)
231 :     })
232 :     setMethod("eoi",
233 :     signature(object = "CSVSource"),
234 :     function(object) {
235 :     if (length(object@Content) <= object@Position)
236 :     return(TRUE)
237 :     else
238 :     return(FALSE)
239 :     })
240 :     setMethod("eoi",
241 :     signature(object = "ReutersSource"),
242 :     function(object) {
243 :     if (length(object@Content) <= object@Position)
244 :     return(TRUE)
245 :     else
246 :     return(FALSE)
247 :     })
248 :     setMethod("eoi",
249 : feinerer 694 signature(object = "GmaneSource"),
250 : feinerer 689 function(object) {
251 :     if (length(object@Content) <= object@Position)
252 :     return(TRUE)
253 :     else
254 :     return(FALSE)
255 :     })

R-Forge@R-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business University of Wisconsin - Madison Powered By FusionForge