SCM

SCM Repository

[tm] Diff of /pkg/R/corpus.R
ViewVC logotype

Diff of /pkg/R/corpus.R

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 837, Wed Apr 23 09:16:25 2008 UTC revision 856, Fri Jun 6 11:45:39 2008 UTC
# Line 220  Line 220 
220                return(object)                return(object)
221            })            })
222  setMethod("asPlain",  setMethod("asPlain",
223            signature(object = "XMLTextDocument", FUN = "function"),            signature(object = "XMLTextDocument"),
224            function(object, FUN, ...) {            function(object, FUN, ...) {
225                corpus <- Content(object)                corpus <- Content(object)
226    
# Line 245  Line 245 
245  setMethod("asPlain",  setMethod("asPlain",
246            signature(object = "RCV1Document"),            signature(object = "RCV1Document"),
247            function(object, FUN, ...) {            function(object, FUN, ...) {
248                FUN <- convertRCV1Plain                return(convertRCV1Plain(object, ...))
               corpus <- Content(object)  
   
               # As XMLDocument is no native S4 class, restore valid information  
               class(corpus) <- "XMLDocument"  
               names(corpus) <- c("doc","dtd")  
   
               return(FUN(xmlRoot(corpus), ...))  
249            })            })
250  setMethod("asPlain",  setMethod("asPlain",
251            signature(object = "NewsgroupDocument"),            signature(object = "NewsgroupDocument"),
# Line 272  Line 265 
265                    LocalMetaData = LocalMetaData(object))                    LocalMetaData = LocalMetaData(object))
266            })            })
267    
268  setGeneric("tmFilter", function(object, ..., FUN = sFilter, doclevel = FALSE) standardGeneric("tmFilter"))  setGeneric("tmFilter", function(object, ..., FUN = searchFullText, doclevel = TRUE) standardGeneric("tmFilter"))
269  setMethod("tmFilter",  setMethod("tmFilter",
270            signature(object = "Corpus"),            signature(object = "Corpus"),
271            function(object, ..., FUN = sFilter, doclevel = FALSE) {            function(object, ..., FUN = searchFullText, doclevel = TRUE) {
272                  if (!is.null(attr(FUN, "doclevel")))
273                      doclevel <- attr(FUN, "doclevel")
274                if (doclevel)                if (doclevel)
275                    return(object[sapply(object, FUN, ..., DMetaData = DMetaData(object))])                    return(object[sapply(object, FUN, ..., DMetaData = DMetaData(object))])
276                else                else
277                    return(object[FUN(object, ...)])                    return(object[FUN(object, ...)])
278            })            })
279    
280  setGeneric("tmIndex", function(object, ..., FUN = sFilter, doclevel = FALSE) standardGeneric("tmIndex"))  setGeneric("tmIndex", function(object, ..., FUN = searchFullText, doclevel = TRUE) standardGeneric("tmIndex"))
281  setMethod("tmIndex",  setMethod("tmIndex",
282            signature(object = "Corpus"),            signature(object = "Corpus"),
283            function(object, ..., FUN = sFilter, doclevel = FALSE) {            function(object, ..., FUN = searchFullText, doclevel = TRUE) {
284                  if (!is.null(attr(FUN, "doclevel")))
285                      doclevel <- attr(FUN, "doclevel")
286                if (doclevel)                if (doclevel)
287                    return(sapply(object, FUN, ..., DMetaData = DMetaData(object)))                    return(sapply(object, FUN, ..., DMetaData = DMetaData(object)))
288                else                else
289                    return(FUN(object, ...))                    return(FUN(object, ...))
290            })            })
291    
 sFilter <- function(object, s, ...) {  
     con <- textConnection(s)  
     tokens <- scan(con, "character", quiet = TRUE)  
     close(con)  
     localMetaNames <- unique(names(sapply(object, LocalMetaData)))  
     localMetaTokens <- localMetaNames[localMetaNames %in% tokens]  
     n <- names(DMetaData(object))  
     tags <- c("Author", "DateTimeStamp", "Description", "ID", "Origin", "Heading", "Language", localMetaTokens)  
     query.df <- DMetaData(prescindMeta(object, tags))  
     if (DBControl(object)[["useDb"]])  
         DMetaData(object) <- DMetaData(object)[, setdiff(n, tags), drop = FALSE]  
     # Rename to avoid name conflicts  
     names(query.df)[names(query.df) == "Author"] <- "author"  
     names(query.df)[names(query.df) == "DateTimeStamp"] <- "datetimestamp"  
     names(query.df)[names(query.df) == "Description"] <- "description"  
     names(query.df)[names(query.df) == "ID"] <- "identifier"  
     names(query.df)[names(query.df) == "Origin"] <- "origin"  
     names(query.df)[names(query.df) == "Heading"] <- "heading"  
     names(query.df)[names(query.df) == "Language"] <- "language"  
     attach(query.df)  
     try(result <- rownames(query.df) %in% row.names(query.df[eval(parse(text = s)), ]))  
     detach(query.df)  
     return(result)  
 }  
   
292  setGeneric("appendElem", function(object, data, meta = NULL) standardGeneric("appendElem"))  setGeneric("appendElem", function(object, data, meta = NULL) standardGeneric("appendElem"))
293  setMethod("appendElem",  setMethod("appendElem",
294            signature(object = "Corpus", data = "TextDocument"),            signature(object = "Corpus", data = "TextDocument"),
# Line 363  Line 335 
335                for (m in meta) {                for (m in meta) {
336                    if (m %in% c("Author", "DateTimeStamp", "Description", "ID", "Origin", "Heading", "Language")) {                    if (m %in% c("Author", "DateTimeStamp", "Description", "ID", "Origin", "Heading", "Language")) {
337                        local.m <- lapply(object, m)                        local.m <- lapply(object, m)
338                          local.m <- sapply(local.m, paste, collapse = " ")
339                        local.m <- lapply(local.m, function(x) if (is.null(x)) return(NA) else return(x))                        local.m <- lapply(local.m, function(x) if (is.null(x)) return(NA) else return(x))
340                        local.m <- unlist(local.m)                        local.m <- unlist(local.m)
341                        DMetaData(object) <- cbind(DMetaData(object), data.frame(m = local.m, stringsAsFactors = FALSE))                        DMetaData(object) <- cbind(DMetaData(object), data.frame(m = local.m, stringsAsFactors = FALSE))

Legend:
Removed from v.837  
changed lines
  Added in v.856

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge