SCM

SCM Repository

[tm] Diff of /pkg/R/textdoccol.R
ViewVC logotype

Diff of /pkg/R/textdoccol.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 712, Sun Mar 4 15:18:36 2007 UTC revision 713, Wed Mar 14 13:44:11 2007 UTC
# Line 169  Line 169 
169                return(object)                return(object)
170            })            })
171    
172  setGeneric("stemDoc", function(object, ...) standardGeneric("stemDoc"))  setGeneric("stemDoc", function(object, language = "english", ...) standardGeneric("stemDoc"))
173  setMethod("stemDoc",  setMethod("stemDoc",
174            signature(object = "PlainTextDocument"),            signature(object = "PlainTextDocument"),
175            function(object, ...) {            function(object, language = "english", ...) {
               require("Rstem")  
176                splittedCorpus <- unlist(strsplit(object, " ", fixed = TRUE))                splittedCorpus <- unlist(strsplit(object, " ", fixed = TRUE))
177                stemmedCorpus <- Rstem::wordStem(splittedCorpus)                stemmedCorpus <- if (require("Rstem"))
178                      Rstem::wordStem(splittedCorpus, language)
179                  else
180                      SnowballStemmer(splittedCorpus, Weka_control(S = language))
181                Corpus(object) <- paste(stemmedCorpus, collapse = " ")                Corpus(object) <- paste(stemmedCorpus, collapse = " ")
182                return(object)                return(object)
183            })            })
# Line 184  Line 186 
186  setMethod("removeWords",  setMethod("removeWords",
187            signature(object = "PlainTextDocument", stopwords = "character"),            signature(object = "PlainTextDocument", stopwords = "character"),
188            function(object, stopwords, ...) {            function(object, stopwords, ...) {
               require("Rstem")  
189                splittedCorpus <- unlist(strsplit(object, " ", fixed = TRUE))                splittedCorpus <- unlist(strsplit(object, " ", fixed = TRUE))
190                noStopwordsCorpus <- splittedCorpus[!splittedCorpus %in% stopwords]                noStopwordsCorpus <- splittedCorpus[!splittedCorpus %in% stopwords]
191                Corpus(object) <- paste(noStopwordsCorpus, collapse = " ")                Corpus(object) <- paste(noStopwordsCorpus, collapse = " ")

Legend:
Removed from v.712  
changed lines
  Added in v.713

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge