SCM

SCM Repository

[tm] Diff of /trunk/tm/R/reader.R
ViewVC logotype

Diff of /trunk/tm/R/reader.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 694, Sun Dec 31 14:47:46 2006 UTC revision 697, Fri Jan 5 23:09:12 2007 UTC
# Line 34  Line 34 
34              ""              ""
35    
36          datetimestamp <- as.POSIXct(strptime(xmlValue(node[["DATE"]]), format = "%d-%B-%Y %H:%M:%S"))          datetimestamp <- as.POSIXct(strptime(xmlValue(node[["DATE"]]), format = "%d-%B-%Y %H:%M:%S"))
         description <- ""  
37          id <- xmlAttrs(node)[["NEWID"]]          id <- xmlAttrs(node)[["NEWID"]]
38    
39          # The <TITLE></TITLE> tag is unfortunately NOT obligatory!          # The <TITLE></TITLE> tag is unfortunately NOT obligatory!
# Line 159  Line 158 
158  convert_rcv1_plain <- function(node, ...) {  convert_rcv1_plain <- function(node, ...) {
159      datetimestamp <- as.POSIXct(xmlAttrs(node)[["date"]])      datetimestamp <- as.POSIXct(xmlAttrs(node)[["date"]])
160      id <- xmlAttrs(node)[["itemid"]]      id <- xmlAttrs(node)[["itemid"]]
     origin <- "Reuters Corpus Volume 1 XML"  
161      corpus <- unlist(xmlApply(node[["text"]], xmlValue), use.names = FALSE)      corpus <- unlist(xmlApply(node[["text"]], xmlValue), use.names = FALSE)
162      heading <- xmlValue(node[["title"]])      heading <- xmlValue(node[["title"]])
163    
# Line 179  Line 177 
177      description <- ""      description <- ""
178      id <- xmlAttrs(node)[["NEWID"]]      id <- xmlAttrs(node)[["NEWID"]]
179    
     origin <- "Reuters-21578 XML"  
   
180      # The <BODY></BODY> tag is unfortunately NOT obligatory!      # The <BODY></BODY> tag is unfortunately NOT obligatory!
181      corpus <- if (!is.null(node[["TEXT"]][["BODY"]]))      corpus <- if (!is.null(node[["TEXT"]][["BODY"]]))
182          xmlValue(node[["TEXT"]][["BODY"]])          xmlValue(node[["TEXT"]][["BODY"]])
# Line 196  Line 192 
192      topics <- unlist(xmlApply(node[["TOPICS"]], function(x) xmlValue(x)), use.names = FALSE)      topics <- unlist(xmlApply(node[["TOPICS"]], function(x) xmlValue(x)), use.names = FALSE)
193    
194      new("PlainTextDocument", .Data = corpus, Cached = TRUE, URI = "", Author = author, DateTimeStamp = datetimestamp,      new("PlainTextDocument", .Data = corpus, Cached = TRUE, URI = "", Author = author, DateTimeStamp = datetimestamp,
195          Description = description, ID = id, Origin = origin, Heading = heading, LocalMetaData = list(Topics = topics))          Description = description, ID = id, Origin = "Reuters-21578 XML", Heading = heading, LocalMetaData = list(Topics = topics))
196  }  }

Legend:
Removed from v.694  
changed lines
  Added in v.697

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge