SCM

SCM Repository

[tm] Diff of /pkg/R/preprocess.R
ViewVC logotype

Diff of /pkg/R/preprocess.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 885, Thu Jan 29 09:34:44 2009 UTC revision 886, Thu Jan 29 22:47:34 2009 UTC
# Line 2  Line 2 
2    
3  # Preprocess the Reuters21578 XML data  # Preprocess the Reuters21578 XML data
4  preprocessReut21578XML <- function(ReutersDir, ReutersOapfDir, fixEnc = TRUE) {  preprocessReut21578XML <- function(ReutersDir, ReutersOapfDir, fixEnc = TRUE) {
5        require("XML")
6    
7      dir.create(ReutersOapfDir, recursive = TRUE)      dir.create(ReutersOapfDir, recursive = TRUE)
8      files <- dir(ReutersDir, pattern = "\\.xml", full.names = TRUE)      files <- dir(ReutersDir, pattern = "\\.xml", full.names = TRUE)
9    
# Line 16  Line 18 
18      # Write out each article in a seperate file      # Write out each article in a seperate file
19      counter <- 1      counter <- 1
20      for (f in files) {      for (f in files) {
21          tree <- xmlTreeParse(f)          tree <- XML::xmlTreeParse(f)
22          xmlApply(xmlRoot(tree),          XML::xmlApply(XML::xmlRoot(tree),
23                   function(article) {                   function(article) {
24                       output.file <- paste(ReutersOapfDir, "reut-",                       output.file <- paste(ReutersOapfDir, "reut-",
25                                            gsub(" ", "0", format(counter, width = 5)),                                            gsub(" ", "0", format(counter, width = 5)),
26                                            ".xml", sep = "")                                            ".xml", sep = "")
27                       counter <<- counter + 1                       counter <<- counter + 1
28                       con <- file(output.file, "w")                       con <- file(output.file, "w")
29                       saveXML(article, file = con)                       XML::saveXML(article, file = con)
30                       close(con)                       close(con)
31                   })                   })
32      }      }

Legend:
Removed from v.885  
changed lines
  Added in v.886

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge