################################################################# RinWord <- function(rtf){ # function convert rtf file to R script, # source the script, # and return the commands, output, and pictures to the rtf file # D= T or D=F sets diagnostic messages ################################################################ # preparation # check that rtf file is in list.files and define script and log files # might be embarrassing connections after source failure closeAllConnections() if (!(rtf %in% list.files())) return(paste(rtf, " not found", sep="")) # define script and log files frtf <- sub(".rtf", "", rtf) fpscript <- paste( frtf, ".R", sep="") flog <-paste(frtf, ".log", sep="") ################################################################## # convert rtf file to R script Bigscript <- rtftoR(rtf) # need to handle error returns better if(length(Bigscript)==1) cat("Error:", Bigscript, "\n");stop # select subset of Bigscript that includes comments, R commands script <- Bigscript$u2 # add the picture function for each picture pscript <- addPicture(script) if(length(pscript)==1) cat("Error:", pscript, "\n"); stop # copy script to file cat(pscript, file=fpscript, sep="\n") # report conclude first step cat("rtf to R" , "\n") ################################################################## # source the pscript, sink it into log file con <- file(flog) sink(con, append = FALSE, type="output") # echo all input and not truncate 150+ character lines... source(fpscript, echo=TRUE, max.deparse.length=100000) # Restore output to console sink() sink(type="message") ScanQ(flog) # Announce completion of source output in log cat("R to log", "\n") ######################################################### # Convert log output to rtf format uu <- logtortf(flog) # reassemble beginning, middle, end fragments uu <- c(Bigscript$u1, uu, Bigscript$u3) cat(file=rtf, uu, sep="\n") # Announce conversion cat( "log to rtf", "\n") ############################################################ # close connections and open rtf file in word closeAllConnections() # would like to reopen word file, but word gets offended # DowordQ(paste(getwd(), "/", rtf, sep="")) invisible() } ####################################################################### rtftoR <- function(file){ # identify R commands, R output, word comments, pictures, convert to R script f <- ScanQ(file) #paste them all together cf <- paste(f, collapse="") PrintQ(ncurlyQ(cf), " curlies") u <- ncurlyQ(cf) if(u$lc != u$rc) return("curlies dont match") # make sure fonts include courier ourfonts <-"{\\\\f2\\\\fbidi \\\\fmodern\\\\fcharset0\\\\fprq1{\\\\*\\\\panose 02070309020205020404}Courier New;}" if(!grepl("Courier", cf)) cf <- sub("fonttbl", paste("fonttbl", ourfonts, sep=""), cf) # treat \line like \par vf <- gsub("\\\\line ", "\\\\par ", cf) PrintQ(ncurlyQ(vf), " curlies") # split vf on \\par v <- SplitQ(vf, "\\\\par", "\\par") PrintQ(length(v), "after splitting on \\par") PrintQ(ncurlyQ(v), " curlies") # get maximum range of text endtext <- grep("504b03", v) PrintQ(endtext, "endtext") vv <- UnwrapQ(v) # if no function file , source functions # if( !( "functions" %in% list.files() ) ){ Functions <- grep("Functions", vv) if(length(Functions)>0 & length(endtext)>0){ startfunctions <- max(Functions)+1 endfunctions <- max(endtext)-1 allf <- v[max(Functions+1): max(endtext-1)] # check source functions works (need better escape) Rallf <- Rcommand(allf) cat(Rallf, file="functions", sep="\n") source("functions", echo=F, max.deparse.length=100000) print(" functions sourced") } #} sD <- 2 eD <- length(v)-1 # red marked lines used select <- grep("\\\\cf6", v) if(length(select) >1){ sD <- min(select) eD <- max(select) cat("lines selected between red comments", sD, eD, "\n") } # or if no red lines the whole thing if(length(select) <= 1){ nulls <- which(vv=="") PrintQ(nulls, "nulls") wdnull <- which(diff(nulls) > 1) if(length(wdnull)==0) return(" No script found") sD <- min(wdnull) + 1 eD <- endtext - 1 PrintQ(c(sD, eD), "all lines selected") } # get rtf before, during, and after selected range u1 <- v[1:(sD-1)] # fix u1 due to blanks acquired when command first u1 <- u1[u1!="\\par }{"] u2 <-v[sD:eD] u3 <-v[(eD+1):length(v)] PrintQ(ncurlyQ(u1), " u1 curlies") PrintQ(ncurlyQ(u2), " u2 curlies") PrintQ(ncurlyQ(u3), " u3 curlies") PrintQ(u2, " Selected pars") # fix first u2 to handle case when command is first # eliminate previous fix, to prevent many start blanks PrintQ(u2, " before correcting first entry") PrintQ(v[-c(1,length(v))], " before correcting u21") u2[1] <- sub("\\\\par \\}\\{", "", u2[1]) u2[1] <- sub("\\{\\\\rtlch", "\\{\\\\par \\}\\{\\\\rtlch", u2[1]) PrintQ(u2, " after correcting first entry") PrintQ(v[-c(1,length(v))], " after correcting u21") # split u2 on \\par } u2 <- SplitQ(u2, "\\\\par \\}", "\\par }") PrintQ(u2, "after splitting on \\par }") PrintQ(ncurlyQ(u2), " curlies") # next few commands eliminate output and adjust formatting lines # inserted after output, so that such lines can be readded later # eliminating output with \\cf2 u2 <- u2[!grepl("\\\\cf2", u2)] # remove comment format lines after output and prompt, unless they contain something commentf <- "insrsid11235074" wherecf <- grepl(commentf, u2) empty <- UnwrapQ(u2) == "" & wherecf u2 <- u2[!empty] PrintQ(u2, " after output removal ") PrintQ(ncurlyQ(u2), " curlies") # split on \par and restore u2 <- paste(u2, collapse="") u2 <- unlist( strsplit(u2, "\\\\par ")) u2[-1] <- paste("\\par ", u2[-1], sep="") PrintQ(u2, " after splitting on par") PrintQ(ncurlyQ(u2), " curlies") # eliminate padding line after prompt and output( carefully) end <- "\\par }{\\pard \\rtlch\\fcs1 \\fi0\\li0\\af39\\afs28 \\ltrch\\fcs0 \\f39\\fs28\\insrsid11235074" endb <- "\\par }{\\pard \\rtlch\\fcs1 \\fi0\\li0\\af39\\afs28 \\ltrch\\fcs0 \\f39\\fs28\\insrsid11235074 " endbb <- "\\par }{\\pard \\rtlch\\fcs1 \\fi0\\li0\\af39\\afs28 \\ltrch\\fcs0 \\f39\\fs28\\insrsid11235074 " endp <- "\\par }{\\rtlch\\fcs1 \\af39\\afs28 \\ltrch\\fcs0 \\f39\\fs28\\insrsid11235074" # fix padding line removal for in command blank lines u2 <- u2[!u2 %in% c(end,endb,endbb,endp)] PrintQ(u2, " after padding line removal ") PrintQ(ncurlyQ(u2), " curlies") # remove INCLUDEPICTURE lines, trouble because broken up u2 <- u2[!grepl("INCLU|DEPICT|CTURE", u2)] PrintQ(u2, " after removing include picture ") PrintQ(ncurlyQ(u2), " curlies") # identify format lines with \\rtlch or \\pard format <- grepl("\\\\rtlch|\\\\pard", u2) PrintQ(u2[format], " format lines") # identify command lines as courier only or dev.off command <- grepl("\\\\f2", u2) & !grepl("\\\\f3", u2) & !grepl("\\\\f1", u2) & !grepl("\\\\f0", u2) command <- command | grepl("dev.off", u2) # set unformatted line to previous line command status for ( line in 2:length(u2)) if( !format[line] ) command[line] <- command[line-1] | command[line] PrintQ(u2[command], "command lines") # substitute ## in command lines whh <- grepl("##", u2[command]) u2[command][whh] <- sub("##", "#X", u2[command][whh]) # now set formatted lines with ## wh <- !command PrintQ(which(wh), " hashlines ") if (sum(wh) > 0) u2[wh] <- paste("##", u2[wh], sep="") # identify command lines v <- u2[command] u2[command] <- Rcommand(v) PrintQ(u2, "R code identified") return(list(u1=u1, u2=u2, u3=u3)) } ################################################################# addPicture <- function(f){ # adds a picture function after each picture call in file f # scan file f fs <- f PrintQ(fs, "before addpicture") # identify png , tiff, jpegdev.off png <- grep("png\\(|tiff\\(|jpeg\\)", fs) devoff <- grep("dev.off\\(\\)", fs) if(length(png) > 0) png <- png[!grepl("#", fs[png])] if(length(devoff) > 0) devoff <- devoff[!grepl("#", fs[devoff])] if (length(png) != length(devoff)){ print(png) print(fs[png]) print(devoff) print(fs[devoff-1]) return( "png|tiff|jpeg and dev.off requests dont match") } if(length(png) > 0){ if(sum(png >= devoff) > 0) return(" devoff before png") # pick off contents of quotes pic <- regmatches(fs[png], regexpr('".*"', fs[png])) fs[devoff] <- paste("dev.off(); picture(", pic, ")", sep="") } PrintQ(fs, "after addPicture") return(fs) } ################################################################## picture <- function(a){ return( cat( paste("\\par }{\\field\\fldedit{\\*\\fldinst{ INCLUDEPICTURE \\\\d", "\"", a, "\"", "\\\\* MERGEFORMATINET }}{\\fldrslt{}}}{\\par }{\\pard \\rtlch\\fcs1 \\fi0\\li0\\af38\\afs28 \\ltrch\\fcs0 \\f39\\fs28\\insrsid11235074", sep=""), "\n") ) } ################################################################ logtortf <- function(f){ # converts log commands, comments, output to rtf u <- ScanQ(f) PrintQ(u, " log before conversion to rtf") # use escape characters for {}\\t\\n, except IP or ## IP <- !grepl("INCLUDE", u) u[!IP] <- paste("> ##", u[!IP], sep="") IH <- !grepl("##", u) IPH <- IP & IH u[IPH] <- gsub("\\{", "\\\\'7b", u[IPH]) u[IPH] <- gsub("\\}", "\\\\'7d", u[IPH]) u[IPH] <- gsub("\\\\n", "\\\\'5cn", u[IPH]) u[IPH] <- gsub("\\\\t", "\\\\'5ct", u[IPH]) # allow for picture by itself pic <- grepl("picture", u) &! grepl("dev.off()", u) IPP <- !IP & !c(T, pic[-length(u)]) nIP <- which(IPP) PrintQ(pic); PrintQ(IPP); PrintQ(nIP) # get rid of two def.off return lines before INCLUDE PrintQ(u, " before fix include picture") if(length(nIP) > 0) u <- u[-c(nIP-1, nIP-2)] PrintQ(u, " after fix include picture") # out, hash, and prompts fixed u <- outRTF(u) PrintQ(u, "out") u <- hashRTF(u) PrintQ(u, "hash") u <- promptRTF(u) PrintQ(u, "prompt") return(u) } ################################################################# outRTF <- function(fs){ # convert non-prompt blocks to rtf code, except INCLUDE prompts <- grep("> |\\+ ", substr(fs, 1, 2)) promptb <- c(prompts, length(fs)+2) if (length(prompts) > 0){ # pick off end and begin of prompts eq <- prompts[diff(promptb) > 1] sq <- promptb[-1][diff(promptb) > 1] sq <- c(prompts[1], sq[-length(sq)]) if(max(eq) < length(fs)) sq <-c(sq, length(fs)+1) if(length(sq) > 1){ for ( i in 1:(length(sq)-1) ){ use <- (eq[i]+1) : (sq[i+1]-1) luse <- length(use) if ( sum(grep("INCLUDE", fs[use]))==0 ){ # remove the horrible [1] if(sum(use)==1) fs[use] <- gsub("\\[1\\]", " ", fs[use]) # allow for \ character fs[use] <- gsub("\\\\", "\\\\'5c", fs[use]) first <-"\\par }\\pard \\ltrpar\\ql \\f2\\fi0\\li0\\ri0\\sl276\\slmult1\\widctlpar\\wrapdefault\\aspalpha\\aspnum\\faauto\\adjustright\\itap0\\pararsid8401699 {\\rtlch\\fcs1 \\af0 \\ltrch\\fcs0 \\b\\f2\\fs24\\cf2\\insrsid8401699" fs[use[1]] <- paste(first, fs[use[1]], sep="") if(length(use) > 1) fs[use[-1]] <- paste("\\line ", fs[use[-1]], sep="") # add comment format to last line end <- "\\par }{\\pard \\rtlch\\fcs1 \\fi0\\li0\\af39\\afs28 \\ltrch\\fcs0 \\f39\\fs28\\insrsid11235074" fs[use[length(use)]] <- paste(fs[use[length(use)]], end, sep="") } } } } return(fs) } ################################################################## hashRTF <- function(fs){ # convert hash blocks to rtf code hash <- grep("> ##|\\+ ##", fs) fs[hash] <- substr(fs[hash], 5, nchar(fs[hash])) return(fs) } ################################################################## promptRTF <- function(fs){ # makes an rtf paragraph for each prompt line except INCLUDE. #restore spaces prompts <- grepl(">|\\+", substr(fs, 1, 1)) prompts <- prompts & !grepl("INCLUDE", fs) fs[prompts][fs[prompts] =="> #" ] <- "> " fs[prompts][fs[prompts] =="+ #" ] <- "+ " PrintQ(fs[prompts], " after hash sub") fs[prompts] <- substr(fs[prompts ], 3, nchar(fs[prompts])) # restore dev.off() line use <- grepl("dev.off", fs[prompts]) fs[prompts][use] <- "dev.off()" # restore curlies and \n \t fs[prompts] <- gsub("\\\\'7b", "\\\\{", fs[prompts]) fs[prompts] <- gsub("\\\\'7d", "\\\\}", fs[prompts]) fs[prompts] <- gsub("\\\\'5cn", "\\\\\\\\n", fs[prompts]) fs[prompts] <- gsub("\\\\'5ct", "\\\\\\\\t", fs[prompts]) PrintQ(fs[prompts]) # resubstitute ## beginning prompt lines whh <- grepl("#X", fs[prompts]) fs[prompts][whh] <- sub("#X", "##", fs[prompts][whh]) h <- prompts & ! c(F, prompts[-length(fs)]) # remove first space from first line of command paragraph space <- substring(fs[h], 1, 1) == " " fs[h][space] <- substr(fs[h][space], 2, nchar(fs[h][space])) # add command format to first line of command paragraph fs[h] <- paste("\\par }{\\pard \\rtlch \\b\\fs28\\f2\\fi0\\li0\\ri0\\fcs1 \\b \\af2 \\afs28 \\cf1\\fs28\\f2 \\ltrch ", fs[h], sep="") # non headers fs[!h & prompts] <- paste("\\par \\hich\\af2\\dbch\\af31505\\loch\\f2 ", fs[!h & prompts], sep="") # add comment format to last line of command paragraph last <- prompts & !c(prompts[-1], F) end <- "\\par }{\\pard \\rtlch\\fcs1 \\fi0\\li0\\af39\\afs28 \\ltrch\\fcs0 \\f39\\fs28\\insrsid11235074" fs[last] <- paste(fs[last], end, sep="") PrintQ(fs[prompts], "clad prompts") return(fs) } ################################################################# Rcommand <- function(v){ # one R command expected in each element of vector v PrintQ(v, "before spacing") # replace space by # v[v=="\\par "] <- "\\par #" PrintQ(v, "handling spaces with hash") # remove non R paragraph controllers, at least two characters v <- gsub("\\\\[a-zA-Z0-9-]{2,}[ ]?", "", v) v <- gsub("\\}\\{", "", v) # use escape sequences for {, } and \n \t v <- gsub("\\\\\\{", "BLC", v) v <- gsub("\\\\\\}", "BRC", v) v <- gsub("\\\\n", "BN", v) v <- gsub("\\\\t", "BT", v) # remove remaining curlies and controllers v <- gsub("\\\\i|\\\\p|\\\\b|\\\\v", "", v) v <- gsub("\\{|\\}", "", v) # restore escaped curlies and \n v <- gsub("BLC", "\\{", v) v <- gsub("BRC", "\\}", v) v <- gsub("BN", "\\n", v) v <- gsub("BT", "\\t", v) # remove an initial space v <- gsub("^ ", "", v) PrintQ(v, "initial space removed") # replace space by # v[v==""] <- "#" PrintQ(v, "R commands") return(v) } ################################################################ KillwordQ <- function(){ # exits all word files sideways # but destroys some of them, so a little dangerous for( i in 1:1000){ u <- system('tasklist /v /FI "IMAGENAME eq winword.exe"', intern=T) if( sum(grepl("INFO", u)) > 0) break if(i==1) print(" Word file will be closed, with recovery") ID <- substr(u[4], 31, 34) system(paste("taskkill /PID", ID, "/F")) } invisible() } ################################################################### DowordQ <- function(file){ # opens a word file, sometimes with nasty side effects system(paste( '"C:/Program Files (x86)/Microsoft Office/Office14/WINWORD.EXE"', file)) invisible() } ################################################################ UnwrapQ <- function(x){ x <- gsub("\\\\[a-zA-Z0-9-]{2,}[ ]?|\\{|\\}", "", x) x <- gsub('".*"', "", x) # vanish expressions cotaining \\* x[grep("\\\\[*]", x)] <- "" x <- gsub("\\\\i|\\\\p|\\\\b|\\\\v", "", x) # vanish spaces at ends of line for(i in 1:50) x <- gsub("^ | $", "", x) return(x) } ################################################################ ScanQ <- function(u) scan(u, quiet=TRUE, what="", sep="\n") ################################################################ SplitQ <- function(x, splitter, restore){ # split x at splitter, restore splitter to front of each element x <- paste(x, collapse="") sx <- unlist( strsplit(x, splitter) ) if(length(sx) > 1){ # paste back splitting expressions sx[-1] <- paste(restore, sx[-1], sep="") } return(sx) } ################################################################ PrintQ <- function(x, a="") { if(FALSE){ print(a) cat(deparse(substitute(x))) for( i in 1:(1+(length(x) %/% 30)) ){ print(x[ (1 + (i-1)*30): min( i*30, length(x)) ]) readline() } } } ################################################################ ncurlyQ <- function(x){ x <- paste(x, collapse="") lc <- length(gregexpr("\\{",x)[[1]] ) rc <- length(gregexpr("\\}",x)[[1]] ) return(list(lc=lc,rc=rc)) }