SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 35, Mon Dec 26 10:16:10 2005 UTC revision 36, Wed Jan 11 15:42:56 2006 UTC
# Line 62  Line 62 
62      timestamp <- xmlAttrs(node)[["date"]]      timestamp <- xmlAttrs(node)[["date"]]
63      description <- "Not yet implemented"      description <- "Not yet implemented"
64      id <- as.integer(xmlAttrs(node)[["itemid"]])      id <- as.integer(xmlAttrs(node)[["itemid"]])
65      origin <- "Not yet implemented"      origin <- "Reuters Corpus Volume 1 XML"
66      corpus <- unlist(xmlApply(node[["text"]], xmlValue), use.names = FALSE)      corpus <- unlist(xmlApply(node[["text"]], xmlValue), use.names = FALSE)
67    
68      if (stripWhiteSpace)      if (stripWhiteSpace)
# Line 78  Line 78 
78    
79  # Parse a <REUTERS></REUTERS> element from a well-formed Reuters-21578 XML file  # Parse a <REUTERS></REUTERS> element from a well-formed Reuters-21578 XML file
80  parseReuters <- function(node, stripWhiteSpace = FALSE, toLower = FALSE) {  parseReuters <- function(node, stripWhiteSpace = FALSE, toLower = FALSE) {
81      author <- "Not yet implemented"      # The <AUTHOR></AUTHOR> tag is unfortunately NOT obligatory!
82        if (!is.null(node[["TEXT"]][["AUTHOR"]]))
83            author <- xmlValue(node[["TEXT"]][["AUTHOR"]])
84        else
85            author <- ""
86    
87      timestamp <- xmlValue(node[["DATE"]])      timestamp <- xmlValue(node[["DATE"]])
88      description <- "Not yet implemented"      description <- ""
89      id <- as.integer(xmlAttrs(node)[["NEWID"]])      id <- as.integer(xmlAttrs(node)[["NEWID"]])
90    
91      origin <- "Not yet implemented"      origin <- "Reuters-21578 XML"
92    
93      # The <BODY></BODY> tag is unfortunately NOT obligatory!      # The <BODY></BODY> tag is unfortunately NOT obligatory!
94      if (!is.null(node[["TEXT"]][["BODY"]]))      if (!is.null(node[["TEXT"]][["BODY"]]))

Legend:
Removed from v.35  
changed lines
  Added in v.36

R-Forge@R-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business University of Wisconsin - Madison Powered By FusionForge