SCM

SCM Repository

[tm] Diff of /trunk/tm/R/textdoccol.R
ViewVC logotype

Diff of /trunk/tm/R/textdoccol.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 777, Tue Aug 28 07:19:12 2007 UTC revision 780, Sat Sep 29 13:24:17 2007 UTC
# Line 216  Line 216 
216                    Heading = Heading(object), Language = Language(object))                    Heading = Heading(object), Language = Language(object))
217            })            })
218    
 setGeneric("tmTolower", function(object, ...) standardGeneric("tmTolower"))  
 setMethod("tmTolower",  
           signature(object = "PlainTextDocument"),  
           function(object, ...) {  
               Corpus(object) <- tolower(object)  
               return(object)  
           })  
   
 setGeneric("stripWhitespace", function(object, ...) standardGeneric("stripWhitespace"))  
 setMethod("stripWhitespace",  
           signature(object = "PlainTextDocument"),  
           function(object, ...) {  
               Corpus(object) <- gsub("[[:space:]]+", " ", object)  
               return(object)  
           })  
   
 setGeneric("stemDoc", function(object, language = "english", ...) standardGeneric("stemDoc"))  
 setMethod("stemDoc",  
           signature(object = "PlainTextDocument"),  
           function(object, language = "english", ...) {  
               splittedCorpus <- unlist(strsplit(object, " ", fixed = TRUE))  
               stemmedCorpus <- if (require("Rstem", quietly = TRUE))  
                   Rstem::wordStem(splittedCorpus, language)  
               else  
                   SnowballStemmer(splittedCorpus, Weka_control(S = language))  
               Corpus(object) <- paste(stemmedCorpus, collapse = " ")  
               return(object)  
           })  
   
 setGeneric("removePunctuation", function(object, ...) standardGeneric("removePunctuation"))  
 setMethod("removePunctuation",  
           signature(object = "PlainTextDocument"),  
           function(object, ...) {  
               Corpus(object) <- gsub("[[:punct:]]+", "", Corpus(object))  
               return(object)  
           })  
   
 setGeneric("removeWords", function(object, stopwords, ...) standardGeneric("removeWords"))  
 setMethod("removeWords",  
           signature(object = "PlainTextDocument", stopwords = "character"),  
           function(object, stopwords, ...) {  
               splittedCorpus <- unlist(strsplit(object, " ", fixed = TRUE))  
               noStopwordsCorpus <- splittedCorpus[!splittedCorpus %in% stopwords]  
               Corpus(object) <- paste(noStopwordsCorpus, collapse = " ")  
               return(object)  
           })  
   
 setGeneric("replaceWords", function(object, words, by, ...) standardGeneric("replaceWords"))  
 setMethod("replaceWords",  
           signature(object = "PlainTextDocument", words = "character", by = "character"),  
           function(object, words, by, ...) {  
               pattern <- paste(words, collapse = "|")  
               Corpus(object) <- gsub(pattern, by, Corpus(object))  
               return(object)  
           })  
   
219  setGeneric("tmFilter", function(object, ..., FUN = sFilter, doclevel = FALSE) standardGeneric("tmFilter"))  setGeneric("tmFilter", function(object, ..., FUN = sFilter, doclevel = FALSE) standardGeneric("tmFilter"))
220  setMethod("tmFilter",  setMethod("tmFilter",
221            signature(object = "TextDocCol"),            signature(object = "TextDocCol"),

Legend:
Removed from v.777  
changed lines
  Added in v.780

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge