# Date: April 25, 2013 # Author:Shivani Rao (raoshivani@gmail.com) # Description: This extracts original source files for the files that are parsed an cleaned in moreBugs. # Requirements: In order to run this code you need to have R installed on your computer and need to have the # moreBugs and the original git repsository cloned. You can use the steps given below to do this: # # Step 1: Clone the git repository of AspectJ # $git clone https://github.com/eclipse/org.aspectj.git # # Step 2: Download moreBugs from https://engineering.purdue.edu/RVL/Database/moreBugs/ # into a folder called moreBugs # moreBugs/AspectJ$ls # # bugs metaInfo revisions tags # # Step3: Use this tool to extract original source files # Example using AspectJ revision number 4154 # # Rscript extract_original_sourceFiles.r /moreBugs/AspectJ revision 4154 CopyFile <- function(srcDir,destDir,srcFile){ srcFile = gsub("^\\s+","",srcFile) for (i in (1:length(srcFile))){ dirtocreate = dirname(paste(destDir,srcFile[i],sep="")) if (!file.exists(dirtocreate)) dir.create(dirtocreate,recursive=T) file.copy(paste(srcDir,srcFile[i],sep=""),paste(destDir,srcFile[i],sep="")) } } ExtractOriginalSourceFiles <- function(basedir,srcDir,destDir,type,value) { if (!file.exists(basedir)){ print("looks like you have not downloaded moreBugs... exiting") # download moreBugs from the link provided and provide the directory of the software you want to work with as basedir # For example if you want to extract original source files of AspectJ then basedir should be "/AspectJ/" return } gitDir = paste(srcDir,".git",sep="") if (!file.exists(gitDir)) { print("sorry you need clone your git first. This source directory you provided does not have a .git directory.. exiting") return } if (type =="revision"){ # this means you just want to copy the source files that have changed in this revision revID <- value } else { if (type =="bug"){ # you want all the source files that were present in the prefix snapshot of this bug bugID <- value } else { if (type =="tag"){ # you want all the source files that were present in this tag tagName <- value } else{ print("sorry wrong input... exiting") return } } } if (!file.exists(destDir)){ print("destination directory was not found, we created one") dir.create(destDir) } commit_revs = scan(file=paste(basedir,"metaInfo/commit_revs.txt",sep=""),what=list(commitid="",revID=0)) tags_revs = scan(file=paste(basedir,"metaInfo/tags_revs.txt",sep=""),what=list(revID=0,tagName="")) bug_prefix = scan(file=paste(basedir,"metaInfo/bug_prefix.txt",sep=""),what=list(bugID=0,commitid="",revID=0)) revList = as.numeric(unlist(readLines(paste(basedir,"metaInfo/revList.txt",sep=""),n=-1))) bugList = sort(as.numeric(unlist(readLines(paste(basedir,"metaInfo/bugList.txt",sep=""),n=-1)))) if (type=="revision") { # extraction of source files that were changed in this revision. if (length(which(commit_revs$revID==revID))>0){ changefile = paste(destDir,"changelist.txt",sep="") ind = which(commit_revs$revID==revID) commitid = commit_revs$commitid[ind] command = paste("cd ",srcDir,"\n git checkout ",commitid,"\n",sep="") system(command) command = paste("cd ",srcDir,"\ngit diff --name-status -C ",commit_revs$commitid[ind],"..",commit_revs$commitid[ind+1]," >> ",changefile,sep="") system(command) print("You may now copy the files specified in changelist.txt into your destination folder") ChangeInfo = gsub(" $","",readLines(changefile,n=-1)) print(length(ChangeInfo)) cvsFiles = grep("^\\.",unlist(lapply(gsub("^[AMD]\t","",ChangeInfo),function(x)basename(x)))) if (length(cvsFiles)>0) { ChangeInfo = ChangeInfo[setdiff((1:length(ChangeInfo)),cvsFiles)] } cvsFiles = grep("CVS",gsub("^[AMD]\t","",ChangeInfo)) if (length(cvsFiles)>0){ ChangeInfo = ChangeInfo[setdiff((1:length(ChangeInfo)),cvsFiles)] } if (length(ChangeInfo)>0){ listOfrevA = grep("^[AMC]",ChangeInfo) if (length(listOfrevA)>0){ srcFile = gsub("^[AMC]\t","",ChangeInfo[grep("^[AMC]",ChangeInfo)]) print(srcFile) CopyFile(srcDir,destDir,srcFile) } } } } else { if (type=="bug"){ # extraction of all source files that were present in the prefix snapshot of this bug ind = which(bug_prefix$bugID==bugID) if (length(ind)==0){ print("wrong bugID we could not find it") return } commitid = bug_prefix$commitid[ind] command = paste("cd ",srcDir,"\n git checkout ",commitid,"\n",sep="") system(command) system(paste("cp -r",srcDir,destDir)) } else { if (type=="tag"){ # extraction of source files that were present in this tag ind = which(tags_revs$tagName==tagName) if (length(ind)==0){ print("wrong bugID we could not find it") return } commitid = tags_revs$commitid[ind] command = paste("cd ",srcDir,"\n git checkout ",commitid,"\n",sep="") system(command) system(paste("cp -r",srcDir,destDir)) } } } } args <- commandArgs(trailingOnly = TRUE) print(length(args)) basedir <- args[1] # the location where the metaInfo directory of the dataset can be found srcDir <- args[2] # the location where .git can be found. Basically location of the clone destDir <- args[3] # location where you want your original source files type <- args[4] # the type of information you are seeking, it could be about revisions, bugs or tags if (type =="revision"){ # this means you just want to copy the source files that have changed in this revision value <- as.numeric(args[5]) } else { if (type =="bug"){ # you want all the source files that were present in the prefix snapshot of this bug value <- as.numeric(args[5]) } else { if (type =="tag"){ # you want all the source files that were present in this tag value <- args[5] } else{ print("sorry wrong input... exiting") quit() } } } ExtractOriginalSourceFiles(basedir,srcDir,destDir,type,value)