SCM

SCM Repository

[tm] Annotation of /pkg/R/source.R
ViewVC logotype

Annotation of /pkg/R/source.R

Parent Directory Parent Directory | Revision Log Revision Log


Revision 810 - (view) (download)
Original Path: trunk/tm/R/source.R

1 : feinerer 689 # Author: Ingo Feinerer
2 :    
3 :     # Source objects
4 :    
5 :     setClass("Source",
6 :     representation(LoDSupport = "logical",
7 :     Position = "numeric",
8 : feinerer 723 DefaultReader = "function",
9 : feinerer 810 Encoding = "character",
10 : feinerer 689 "VIRTUAL"))
11 :    
12 :     # A directory with files
13 :     setClass("DirSource",
14 : feinerer 694 representation(FileList = "character"),
15 : feinerer 689 contains = c("Source"))
16 :    
17 :     # A single CSV file where each line is interpreted as document
18 :     setClass("CSVSource",
19 :     representation(URI = "ANY",
20 :     Content = "character"),
21 :     contains = c("Source"))
22 :    
23 :     # A single XML file consisting of several Reuters documents
24 :     # Works both for Reuters21578XML and RCV1 XML files
25 :     setClass("ReutersSource",
26 :     representation(URI = "ANY",
27 :     Content = "list"),
28 :     contains = c("Source"))
29 :    
30 : feinerer 694 # A single XML (RDF) file containing Gmane mailing list archive feeds
31 :     setClass("GmaneSource",
32 : feinerer 689 representation(URI = "ANY",
33 :     Content = "list"),
34 :     contains = c("Source"))
35 :    
36 :    
37 :     # Methods for Source objects
38 :    
39 : feinerer 810 setGeneric("DirSource", function(directory, encoding = "UTF-8", recursive = FALSE) standardGeneric("DirSource"))
40 : feinerer 689 setMethod("DirSource",
41 :     signature(directory = "character"),
42 : feinerer 810 function(directory, encoding = "UTF-8", recursive = FALSE) {
43 : feinerer 689 d <- dir(directory, full.names = TRUE, recursive = recursive)
44 :     isdir <- sapply(d, file.info)["isdir",]
45 :     files <- d[isdir == FALSE]
46 :     new("DirSource", LoDSupport = TRUE, FileList = files,
47 : feinerer 810 Position = 0, DefaultReader = readPlain, Encoding = encoding)
48 : feinerer 689 })
49 :    
50 : feinerer 810 setGeneric("CSVSource", function(object, encoding = "UTF-8") standardGeneric("CSVSource"))
51 : feinerer 689 setMethod("CSVSource",
52 :     signature(object = "character"),
53 : feinerer 810 function(object, encoding = "UTF-8") {
54 :     object <- substitute(file(object, encoding = encoding))
55 : feinerer 689 con <- eval(object)
56 : feinerer 807 content <- apply(read.csv(con), 1, paste, collapse = " ")
57 : feinerer 689 new("CSVSource", LoDSupport = FALSE, URI = object,
58 : feinerer 810 Content = content, Position = 0, DefaultReader = readPlain, Encoding = encoding)
59 : feinerer 689 })
60 :     setMethod("CSVSource",
61 :     signature(object = "ANY"),
62 : feinerer 810 function(object, encoding = "UTF-8") {
63 : feinerer 689 object <- substitute(object)
64 :     con <- eval(object)
65 : feinerer 807 content <- apply(read.csv(con), 1, paste, collapse = " ")
66 : feinerer 689 new("CSVSource", LoDSupport = FALSE, URI = object,
67 : feinerer 810 Content = content, Position = 0, DefaultReader = readPlain, Encoding = encoding)
68 : feinerer 689 })
69 :    
70 : feinerer 810 setGeneric("ReutersSource", function(object, encoding = "UTF-8") standardGeneric("ReutersSource"))
71 : feinerer 689 setMethod("ReutersSource",
72 :     signature(object = "character"),
73 : feinerer 810 function(object, encoding = "UTF-8") {
74 :     object <- substitute(file(object, encoding = encoding))
75 : feinerer 689 con <- eval(object)
76 :     corpus <- paste(readLines(con), "\n", collapse = "")
77 :     close(con)
78 :     tree <- xmlTreeParse(corpus, asText = TRUE)
79 :     content <- xmlRoot(tree)$children
80 :    
81 :     new("ReutersSource", LoDSupport = FALSE, URI = object,
82 : feinerer 810 Content = content, Position = 0, DefaultReader = readReut21578XML, Encoding = encoding)
83 : feinerer 689 })
84 :     setMethod("ReutersSource",
85 :     signature(object = "ANY"),
86 : feinerer 810 function(object, encoding = "UTF-8") {
87 : feinerer 689 object <- substitute(object)
88 :     con <- eval(object)
89 :     corpus <- paste(readLines(con), "\n", collapse = "")
90 :     close(con)
91 :     tree <- xmlTreeParse(corpus, asText = TRUE)
92 :     content <- xmlRoot(tree)$children
93 :    
94 :     new("ReutersSource", LoDSupport = FALSE, URI = object,
95 : feinerer 810 Content = content, Position = 0, DefaultReader = readReut21578XML, Encoding = encoding)
96 : feinerer 689 })
97 :    
98 : feinerer 810 setGeneric("GmaneSource", function(object, encoding = "UTF-8") standardGeneric("GmaneSource"))
99 : feinerer 694 setMethod("GmaneSource",
100 : feinerer 689 signature(object = "character"),
101 : feinerer 810 function(object, encoding = "UTF-8") {
102 :     object <- substitute(file(object, encoding = encoding))
103 : feinerer 689 con <- eval(object)
104 :     corpus <- paste(readLines(con), "\n", collapse = "")
105 :     close(con)
106 :     tree <- xmlTreeParse(corpus, asText = TRUE)
107 :     content <- xmlRoot(tree)$children
108 :     content <- content[names(content) == "item"]
109 :    
110 : feinerer 694 new("GmaneSource", LoDSupport = FALSE, URI = object,
111 : feinerer 810 Content = content, Position = 0, DefaultReader = readGmane, Encoding = encoding)
112 : feinerer 689 })
113 : feinerer 694 setMethod("GmaneSource",
114 : feinerer 689 signature(object = "ANY"),
115 : feinerer 810 function(object, encoding = "UTF-8") {
116 : feinerer 689 object <- substitute(object)
117 :     con <- eval(object)
118 :     corpus <- paste(readLines(con), "\n", collapse = "")
119 :     close(con)
120 :     tree <- xmlTreeParse(corpus, asText = TRUE)
121 :     content <- xmlRoot(tree)$children
122 :     content <- content[names(content) == "item"]
123 :    
124 : feinerer 694 new("GmaneSource", LoDSupport = FALSE, URI = object,
125 : feinerer 810 Content = content, Position = 0, DefaultReader = readGmane, Encoding = encoding)
126 : feinerer 689 })
127 :    
128 : feinerer 698 setGeneric("stepNext", function(object) standardGeneric("stepNext"))
129 :     setMethod("stepNext",
130 : feinerer 689 signature(object = "DirSource"),
131 :     function(object) {
132 :     object@Position <- object@Position + 1
133 :     object
134 :     })
135 : feinerer 698 setMethod("stepNext",
136 : feinerer 689 signature(object = "CSVSource"),
137 :     function(object) {
138 :     object@Position <- object@Position + 1
139 :     object
140 :     })
141 : feinerer 698 setMethod("stepNext",
142 : feinerer 689 signature(object = "ReutersSource"),
143 :     function(object) {
144 :     object@Position <- object@Position + 1
145 :     object
146 :     })
147 : feinerer 698 setMethod("stepNext",
148 : feinerer 694 signature(object = "GmaneSource"),
149 : feinerer 689 function(object) {
150 :     object@Position <- object@Position + 1
151 :     object
152 :     })
153 :    
154 : feinerer 698 setGeneric("getElem", function(object) standardGeneric("getElem"))
155 :     setMethod("getElem",
156 : feinerer 689 signature(object = "DirSource"),
157 :     function(object) {
158 :     filename <- object@FileList[object@Position]
159 : feinerer 810 encoding <- object@Encoding
160 :     list(content = readLines(filename, encoding = encoding),
161 :     uri = substitute(file(filename, encoding = encoding)))
162 : feinerer 689 })
163 : feinerer 698 setMethod("getElem",
164 : feinerer 689 signature(object = "CSVSource"),
165 :     function(object) {
166 :     list(content = object@Content[object@Position],
167 :     uri = object@URI)
168 :     })
169 : feinerer 698 setMethod("getElem",
170 : feinerer 689 signature(object = "ReutersSource"),
171 :     function(object) {
172 :     # Construct a character representation from the XMLNode
173 :     con <- textConnection("virtual.file", "w")
174 :     saveXML(object@Content[[object@Position]], con)
175 :     close(con)
176 :    
177 :     list(content = virtual.file, uri = object@URI)
178 :     })
179 : feinerer 698 setMethod("getElem",
180 : feinerer 694 signature(object = "GmaneSource"),
181 : feinerer 689 function(object) {
182 :     # Construct a character representation from the XMLNode
183 :     con <- textConnection("virtual.file", "w")
184 :     saveXML(object@Content[[object@Position]], con)
185 :     close(con)
186 :    
187 :     list(content = virtual.file, uri = object@URI)
188 :     })
189 :    
190 :     setGeneric("eoi", function(object) standardGeneric("eoi"))
191 :     setMethod("eoi",
192 :     signature(object = "DirSource"),
193 :     function(object) {
194 :     if (length(object@FileList) <= object@Position)
195 :     return(TRUE)
196 :     else
197 :     return(FALSE)
198 :     })
199 :     setMethod("eoi",
200 :     signature(object = "CSVSource"),
201 :     function(object) {
202 :     if (length(object@Content) <= object@Position)
203 :     return(TRUE)
204 :     else
205 :     return(FALSE)
206 :     })
207 :     setMethod("eoi",
208 :     signature(object = "ReutersSource"),
209 :     function(object) {
210 :     if (length(object@Content) <= object@Position)
211 :     return(TRUE)
212 :     else
213 :     return(FALSE)
214 :     })
215 :     setMethod("eoi",
216 : feinerer 694 signature(object = "GmaneSource"),
217 : feinerer 689 function(object) {
218 :     if (length(object@Content) <= object@Position)
219 :     return(TRUE)
220 :     else
221 :     return(FALSE)
222 :     })

R-Forge@R-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business University of Wisconsin - Madison Powered By FusionForge