diff --git a/R/insert_breaks.R b/R/insert_breaks.R index 4b445ff..a1ff71b 100644 --- a/R/insert_breaks.R +++ b/R/insert_breaks.R @@ -130,14 +130,16 @@ insert_break <- function(level, if (ret_value$add_semantics) { # create fill creators <- setNames(Map(create_creators, - start = c("", "#", ".", "")), - c("get_title", "get_id", "get_class","get_attribute")) - fill <- create_fill(title = ret_value$text1, - id = ret_value$id, - classes = ret_value$classes, - attributes = ret_value$keyvaluepairs, - function_container = creators) - + start = c("", "", "", "")), + c("get_title", "get_id", "get_class","get_attribute")) + tempstring=ret_value$id + fill <- create_fill(id = ret_value$id, + classes = ret_value$classes, + title = ret_value$text1, + attributes = ret_value$keyvaluepairs, + json_ld = ret_value$json_ld, + function_container = creators) + } else { fill <- ret_value$text1 } @@ -149,7 +151,6 @@ insert_break <- function(level, options(strcode = op) level <- as.numeric(unlist(strsplit(ret_value$level, ""))[nchar(ret_value$level)]) - } else { fill <- "" anchor_in_sep <- FALSE @@ -158,9 +159,7 @@ insert_break <- function(level, ### set parameter depending on level start <- paste0(rep("#", level), collapse = "") break_char = give_breakchar(level) - sep = paste(rep(" ", 4 - level), collapse = "") - - + sep = paste(rep(" ", 8 - level), collapse = "") # ____________________________________________________________________________ @@ -266,6 +265,7 @@ help_create_title <- function(start = "##", # create a text that starts with start, adds sep and then spaces up to margin # too long texts will be truncated if (fill == "") return(NULL) + text <- paste0(start, sep, fill) extension <- paste0(rep(" ", @@ -284,8 +284,12 @@ help_create_title <- function(start = "##", give_breakchar <- function(level) { switch(as.character(level), "1" = "_", - "2" = ".", - "3" = ". ") + "2" = "._", + "3" = ".._", + "4" = "..._", + "5" = "...._", + "6" = "....._", + "7" = "......_",) } ## ............................................................................ ## help insert ---- @@ -339,8 +343,13 @@ help_insert <- function(x, #' @importFrom stats setNames #' @keywords internal find_title <- function(level) { - choices_input <- paste("level", 1:3) - class_choices <- paste("class", 1:10) + #XJ Original codes: + #styles_input <- paste(c("Default","JSON-LD")) + choices_input <- paste("level", 1:7) + + class_choices <- paste(c("provone:Process","provone:InputPort","provone:OutputPort","provone:DataLink","provone:SeqCtrlLink", + "provone:Workflow","provone:User","provone:ProcessExec","provone:Data","provone:Collection", + "provone:Visualization","provone:Program","prov:Plan")) ui <- miniPage( miniContentPanel( fillCol( @@ -375,14 +384,16 @@ find_title <- function(level) { width = "320px", multiple = TRUE), selectizeInput("keyvaluepairs", width = "320px", - label = "key-value pairs", + label = "Manually input: property=value", choices = "", multiple = TRUE, options = list(create = TRUE, persist = FALSE, - createFilter = "^[a-zA-Z1-9]+\\s*=\\s*[a-zA-Z1-9]+$")) + createFilter = "^.+\\s*=\\s*.+$")), + checkboxInput("json_ld", "JSON-LD",width = "80px") ) ) - ,flex = c(0.8, 0.5, 3)) + ,flex = c(0.8, 0.5, 3) + ) ) ) @@ -394,7 +405,9 @@ find_title <- function(level) { id = input$pandoc_id, level = input$level, classes = input$classes, - keyvaluepairs = input$keyvaluepairs)) + keyvaluepairs = input$keyvaluepairs, + json_ld = input$json_ld + )) observeEvent(input$done, { stopApp(eval(listout)) diff --git a/R/sum_str.R b/R/sum_str.R index d3d3b71..f106e9f 100644 --- a/R/sum_str.R +++ b/R/sum_str.R @@ -39,8 +39,22 @@ #' contain a title or not. #' @param header A boolean value indicating whether a column header should #' indicate the name of the columns (line, level, section). +#' @param rdf A boolean value indicating whether a rdf file should be generated. +#' @param graph A boolean value indicating whether a rdf graph should be generated. +#' @param domain A boolean value indicating whether use user's working domain as +#' a prefix header in generated rdf file. +#' @param baseURI A character string naming the URI for user's working domain. +#' @param UserID A character string naming the user name in the working domain. +#' @param prefix A character string naming the abbreviation for user's domain. +#' @param UserAL A boolean value indicating whether use default association +#' library. +#' @param fillAssociation A boolean value indicating whether use a pair of default +#' associations to name unlisted associations in User default association library. +#' @param UserANM A boolean value indicating whether use defaut association list. +#' Only associations in this list could serve as an relationship in output rdf graph. #' @param ... futher arguments to be passed from and to other methods, in #' particular \code{\link{list.files}} for reading in multiple files. + #' @details To create the summary, \code{sum_str} uses regular expressions. #' Hence it is crucial that the code separators and the separator titles #' match the regular expression pattern. We recommend inserting @@ -50,10 +64,10 @@ #' as well. #' \itemize{ #' \item A code separator is defined as a line that starts with n hashes, -#' followed by 4-n spaces where 0 < n < 4. This sequence is followed by one +#' followed by 8-n spaces where 0 < n < 8. This sequence is followed by one #' or more either \code{.} or \code{_}. #' \item A title associated with a code separator is defined as a line -#' that starts with n hashes, followed by 4-n spaces where 0 < n < 4. This +#' that starts with n hashes, followed by 8-n spaces where 0 < n < 8. This #' sequence is \emph{not} followed by \code{.} or \code{_}. #' } #' Lines that do not satisfy these requirements (e.g. do not start with #s, @@ -73,11 +87,20 @@ #' # the following separator is an example of a valid #' # separator and associated title #' -#' # __________________________________________________ -#' # this is a level 1 title #### +#' # __________________________________________________ +#' # this is a level 1 title #### #' ## . . . . . . . . . . . . . . . . . . . . . . . . . #' ## note that the title or the separator character (_, .) -#' ## always starts at indention 4. +#' ## always starts at indention 8. +#' # to create separators which is valid for generating rdf file, +#' # users should fill out at least title and class, and put the entities into +#' # correct levels to create associations automatically by function. +#' +#' # the following separator is an example of a valid +#' # separator and associated title for generating rdf file +#' +#' # ________________________________________________________________________ +#' # YourWorkflow {YourID provone:Workflow} #### #' #' \dontrun{ #' # Open a new .R file in RStudio, insert some code breaks @@ -103,6 +126,16 @@ sum_str <- function(path_in = getSourceEditorContext()$path, last_sep = FALSE, title = TRUE, header = TRUE, + rdf = FALSE, + graph=FALSE, + domain=FALSE, + baseURI="http://example.org/base/", + UserID="UserID", + prefix="user", + UserAL=FALSE, + fillAssociation=FALSE, + UserANM=FALSE, + HideAssociation=FALSE, ...) { ## ............................................................................ @@ -154,7 +187,17 @@ assert_number(granularity, lower = 1, upper = 3) granularity = granularity, last_sep = last_sep, title = title, - header = header) + header = header, + rdf=rdf, + graph=graph, + domain=domain, + baseURI=baseURI, + UserID=UserID, + prefix=prefix, + UserAL=UserAL, + fillAssociation=fillAssociation, + UserANM=UserANM, + HideAssociation=HideAssociation) }) if (dir_out == "" && !is.null(file_out) && file_out == "object") { @@ -193,7 +236,17 @@ sum_str_helper <- function(path_in, granularity, last_sep, title, - header) { + header, + rdf, + graph, + domain, + baseURI, + UserID, + prefix, + UserAL, + fillAssociation, + UserANM, + HideAssociation) { ## ............................................................................ ## argument interaction #### @@ -224,7 +277,7 @@ sum_str_helper <- function(path_in, ## get pattern lines <- readLines(con = path_in) - sub_pattern <- "^# [^ ]|^## [^ ]|^### [^ ]" + sub_pattern <- "^# [^ ]|^## [^ ]|^### [^ ]|^#### [^ ]|^##### [^ ]|^###### [^ ]|^####### [^ ]" cand <- grep(sub_pattern, lines, perl = FALSE) # extract candiates id if (length(cand) == 0) { return(warning("No line matching the required pattern", @@ -241,7 +294,7 @@ if (rm_break_anchors) { lvl <- nchar(gsub("^(#+)\\s.*$", "\\1", lines[hash_candid], perl = TRUE)) replacement <- vapply(lvl, function(x) help_create_break(start = paste0(rep("#", x), collapse = ""), break_char = give_breakchar(x), - sep = paste(rep(" ", 4 - x), collapse = ""), anchor_in_sep = FALSE), + sep = paste(rep(" ", 8 - x), collapse = ""), anchor_in_sep = FALSE), FUN.VALUE = character(1)) lines[hash_candid] <- replacement @@ -253,7 +306,7 @@ if (rm_break_anchors) { remove <- c() if (granularity < 3) { # if there are any lines to remove hashes <- (granularity + 1):3 - spaces <- 4 - hashes + spaces <- 8- hashes # this variable stores the indices of all lines that should be dropped. for (i in 1:length(hashes)) { @@ -266,7 +319,7 @@ if (rm_break_anchors) { ### remove last separator if (last_sep == FALSE) { hashes <- min(find_gran("down", lines = lines), granularity) - spaces <- 4 - hashes + spaces <- 8 - hashes sub_pattern <- paste0("^#{", hashes, "}\\s{", spaces, "}[\\._].*$") remove <- append(remove, grep(sub_pattern, lines, perl = TRUE)) } @@ -330,7 +383,583 @@ if (rm_break_anchors) { ## ............................................................................ ## output the pattern - if (dir_out == "" && file_out == "object") { + + if (rdf=="ttl"|graph){ # if users want to generate rdf file or rdf graph + # use system time to generate a base file + datetime <- format(Sys.time(), "%Y_%m_%d_%H_%M_%S") + fileformat=".ttl" #".txt" + outputfile2 <- paste("RDF_output_file_",datetime,fileformat,sep="") + write(lines,file=outputfile2) + + + templines=readLines(outputfile2) + print (templines) + lines_content=templines[4:length(templines)] + lines_split=strsplit(lines_content, " ") + print (lines_split) + + baseURI=baseURI + UserID=UserID + FullURI=paste0(baseURI,UserID,"/") + prefix=prefix + + schemalist=list() + # a schemas library to create headers by using keywords in content + schemas=c(xsd="@prefix xsd: .", + owl="@prefix owl: .", + dcterms="@prefix dcterms: .", + prov="@prefix prov: .", + wfms="@prefix wfms: .", + rdf="@prefix rdf: .", + provone="@prefix provone: .", + skos="@prefix skos: .", + yw="@prefix yw: .", + cwfo="@prefix cwfo: .", + cwf="@prefix cwf: .") + # Using ":" to find class + for (i in 1:length(lines_split)){ + schemalist[[i]]=grep(":",lines_split[[i]]) + } + tempcount0=0 + schemalist1=list() + for (i in 1:length(schemalist)){ + if (length(schemalist[[i]])>0){ + tempcount0=tempcount0+1 + schemalist1[[tempcount0]]=schemalist[[i]] + } + } + # delete the seperator line and get useful information only + tempcount0=0 + lines_split1=list() + for (i in 1:length(schemalist)){ + if (length(schemalist[[i]])>0){ + tempcount0=tempcount0+1 + lines_split1[[tempcount0]]=lines_split[[i]] + } + } + + lines_split=lines_split1 + schemalist=schemalist1 + lines_split1=schemalist + for (i in 1:length(lines_split)){ + tempcount0=0 + for (j in 1:length(lines_split[[i]])){ + if (nchar(lines_split[[i]][j])>0){ + tempcount0=tempcount0+1 + lines_split1[[i]][tempcount0]=lines_split[[i]][j] + } + } + } + # delete the symbols which is used in seperator line + for (i in 1:length(lines_split1)){ + schemalist[[i]]=grep(":",lines_split1[[i]]) + } + lines_split=lines_split1 + + infolist=lines_split + print ("this is infolist1") + print (infolist) + for (i in 1:length(infolist)){ + templevel=strsplit(infolist[[i]][1],"\\t") + infolist[[i]][1]=nchar(templevel[[1]][2]) + } + for (i in 1:length(infolist)){ + infolist[[i]][3]=gsub("\\{","",infolist[[i]][3]) + infolist[[i]][3]=gsub("\\#","",infolist[[i]][3]) + } + for (i in 1:length(infolist)){ + infolist[[i]][length(infolist[[i]])]=gsub("\\}","",infolist[[i]][length(infolist[[i]])]) + } + infolist1=infolist + + # delete JSON-LD style structure and only leave the useful information + for (i in grep("\":",infolist)){ + infolist[[i]][2]=gsub(",","",infolist1[[i]][4]) + infolist[[i]][3]=gsub(",","",infolist1[[i]][2]) + infolist[[i]][4]=gsub(",","",infolist1[[i]][3]) + } + + for (j in 1:length(infolist)){ + for (i in 1:length(strsplit(infolist[[j]], "\":"))){ + if (length(strsplit(infolist[[j]], "\":")[[i]])>1){ + infolist1[[j]][i]=strsplit(infolist[[j]], "\":")[[i]][2] + } + } + } + infolist=infolist1 + print ("this is infolist2") + print (infolist) + + schemahad=0 + lines_rdf=" @prefix rdfs: .\n" + count0=1 + + schemalist1=list() + for (i in 1:length(infolist)){ + schemalist1[[i]]=grep(":",infolist[[i]]) + } + schemalist=schemalist1 + + # add prefix + for (i in 1:length(schemalist)){ + for (j in 1:length(schemalist[[i]])){ + tempstr=infolist[[i]][(schemalist[[i]])[j]] + tempschemastr=gsub("\\.","",strsplit(tempstr,'\\:')[[1]][1]) + schemas[tempschemastr] + if (tempschemastr %in% schemahad) { + } + else{schemahad[count0]=tempschemastr + count0=count0+1} + } + } + # adding headers: + lines_rdf=paste0(lines_rdf," @prefix ",prefix,": ","<",FullURI,"> .\n") + for (i in 1:length(schemahad)){ + lines_rdf=paste(lines_rdf,schemas[schemahad[i]],"\n") + } + # creating a default association list + DefaultAssociationlist=paste0("AssociationName\n","provone:hasSubProcess\n","provone:sourcePToCL\n","provone:CLtoDestP\n", + "provone:hasInPort\n","provone:hasOutPort\n","provone:hasDefaultParam\n", + "provone:DLToInPort\n","provone:outPortToDL\n","provone:inPortToDL\n", + "provone:DLToOutPort\n","provone:wasAttributedTo\n","provone:wasDerivedFrom\n", + "provone:dataOnLink\n","provone:used\n","provone:wasGeneratedBy\n", + "provone:wasAssociatedWith\n","provone:wasInformedBy\n","provone:isPartOf\n", + "provone:hadMember\n","cwfo:hasOutData\n","cwfo:hasInData\n") + write(DefaultAssociationlist,file="DefaultAssociationNames.txt") + + Associationlist.df=read.table("DefaultAssociationNames.txt",sep=",",header=TRUE) + + # whether use default association list or not + if (UserANM==FALSE){ + Associationlist.df=read.table("DefaultAssociationNames.txt",sep=",",header=TRUE) + } + else if (UserANM==TRUE) { + Associationlist.df=read.table("AssociationNames.txt",sep=",",header=TRUE) + } + Associationlist=Associationlist.df$AssociationName + + # creating a default association library: + DefaultAL=paste0("ParentClass,","ChildClass,","Ways,","Property,","ReverseProperty\n", + "\"provone:Process\",","\"provone:Process\",","2,","\"provone:hasSubProcess\",","\"provone:wasDerivedFrom\"\n", + "\"provone:Process\",","\"provone:Data\",","2,","\"provone:wasDerivedFrom\",","\"provone:hasMember\"\n", + "\"provone:Process\",","\"provone:Visualization\",","2,","\"provone:wasDerivedFrom\",","\"provone:hasMember\"\n") + write(DefaultAL,file="DefaultAssociationLibrary.txt") + + nodesnames=nodesclasses=nodesfrom=nodesto=nodesproperty=parentclass=property=line_rdf_vector=title1=esci="" + templevel=parentlevel=parentindex=tempwordlist=0 + levelvector=rep(0,7) + + # get property of association by using parent entity class and child entity class automatically + for (j in 1:length(infolist)){ + AssociationNUM=firstmeet=trigger1=0 + line_rdf=classeswords="" + title0=infolist[[j]][2] + title1[j]=title0 + ID=infolist[[j]][3] + parentlevel=templevel + templevel=infolist[[j]][1] + tempclass=infolist[[j]][4] + # levelvector saves existing levels + if (infolist[[j]][1]==1){ + if (levelvector[1]==0){ + levelvector[1]=j + } + } + if (infolist[[j]][1]==2){ + if (levelvector[2]==0){ + levelvector[2]=j + } + } + if (infolist[[j]][1]==3){ + if (levelvector[3]==0){ + levelvector[3]=j + } + } + if (infolist[[j]][1]==4){ + if (levelvector[4]==0){ + levelvector[4]=j + } + } + if (infolist[[j]][1]==5){ + if (levelvector[5]==0){ + levelvector[5]=j + } + } + if (infolist[[j]][1]==6){ + if (levelvector[6]==0){ + levelvector[6]=j + } + } + if (infolist[[j]][1]==7){ + if (levelvector[7]==0){ + levelvector[7]=j + } + } + # replace existing levels when found a new one + if (as.numeric(parentlevel)!=0){ + if (as.numeric(templevel)>as.numeric(parentlevel)){ + parentindex=j-1 + parentclass=infolist[[j-1]][4] + } + else if (templevel==parentlevel){ + parentindex=levelvector[as.numeric(templevel)-1] + parentclass=infolist[[as.numeric(parentindex)]][4] + } + else { + levelvector[as.numeric(templevel)]=j + parentindex=levelvector[as.numeric(templevel)-1] + parentclass=infolist[[parentindex]][4] + } + } + # judge association: + if (UserAL==FALSE){ + AssociationsLib=read.table("DefaultAssociationLibrary.txt",sep=",",header=TRUE) + } + else if (UserAL==TRUE) { + AssociationsLib=read.table("AssociationLibrary.txt",sep=",",header=TRUE) + } + tempPwordlist=which(AssociationsLib$ParentClass==parentclass) + tempwordlist=which(AssociationsLib$ChildClass[tempPwordlist]==tempclass) + # whether the relationship between parent and child classes are in association library + if (length(tempwordlist)>1){ + AssociationNUM=tempPwordlist[min(tempwordlist)] + } + else if (length(tempwordlist)==1){ + AssociationNUM=tempPwordlist[tempwordlist] + } + # get association property and other information if it is in association library + if (AssociationNUM>0){ + property=as.character(AssociationsLib$Property[AssociationNUM]) + # two ways association + if (AssociationsLib$Ways[AssociationNUM]==2){ + nodesfrom=paste0(nodesfrom,infolist[[as.numeric(parentindex)]][2]," ") + nodesto=paste0(nodesto,infolist[[j]][2]," ") + nodesproperty=paste0(nodesproperty,property," ") + nodesfrom=paste0(nodesfrom,infolist[[j]][2]," ") + nodesto=paste0(nodesto,infolist[[as.numeric(parentindex)]][2]," ") + nodesproperty=paste0(nodesproperty,AssociationsLib$ReverseProperty," ") + } + # one way association + else if (AssociationsLib$Ways[AssociationNUM]==1){ + nodesfrom=paste0(nodesfrom,infolist[[as.numeric(parentindex)]][2]," ") + nodesto=paste0(nodesto,infolist[[j]][2]," ") + nodesproperty=paste0(nodesproperty,property," ") + } + + } + # if using default association to fill the nonexistent association in the library + else if ((fillAssociation==TRUE)&(as.numeric(parentlevel)!=0)){ + property="str:has" + nodesfrom=paste0(nodesfrom,infolist[[as.numeric(parentindex)]][2]," ") + nodesto=paste0(nodesto,infolist[[j]][2]," ") + nodesproperty=paste0(nodesproperty,property," ") + nodesfrom=paste0(nodesfrom,infolist[[j]][2]," ") + nodesto=paste0(nodesto,infolist[[as.numeric(parentindex)]][2]," ") + nodesproperty=paste0(nodesproperty,"str:belongTo"," ") + } + #print ("This is levelvector") + #print (levelvector) + #print ("This is from") + #print (nodesfrom) + #print ("This is to") + #print (nodesto) + #print ("This is property") + #print (nodesproperty) + # deal with other information besides title, id and class + for (i in 4:length(infolist[[j]])){ + tempword="" + tempentity="" + temp_line="" + if (i==4){ + tempword=infolist[[j]][4] + nodesnames=paste0(nodesnames,title0," ") + classeswords=paste0(tempword) + + entityname=paste0(prefix,":",ID) + title=paste0(entityname) + line_rdf=paste0("\n ",title," a ",tempword) + + # print content depends on whether it is the last one or not + if (i==length(infolist[[j]])){ + nodesclasses=paste0(nodesclasses,classeswords," ") + line_rdf=paste(line_rdf,";","\n") + title0=paste0("\"",title0,"\"") + # add title as rdfs:label in the output file + #line_rdf=paste(line_rdf,"\t","rdfs:label",title0,".","\n")#,".","\n") + } + else{ + line_rdf=paste(line_rdf) + } + + }# out of if i==4 + else { # i>4 + tempword=infolist[[j]][i] + # find manually input values + print ("This is j") + print (j) + print ("This is tempword (tempword=infolist[[j]][i])") + print (tempword) + print ("This is infolist[[j]]") + print (infolist[[j]]) + if (grepl("=",tempword)){ + print ("This is j get equal mark") + print (j) + # print founded class or classes in output file + firstmeet=firstmeet+1 + if (firstmeet==1){ + line_rdf=paste0(line_rdf,";","\n") + } + + tempwordlist=strsplit(tempword,"=") + # if the manually typed input is an association, add this relation into nodes data frame + print (tempwordlist[[1]][1]) + print (Associationlist) + print (tempwordlist[[1]][1] %in% Associationlist) + if (tempwordlist[[1]][1] %in% Associationlist) + { + trigger1=1 + nodesfrom=paste0(nodesfrom,title0," ") + nodesto=paste0(nodesto,tempwordlist[[1]][2]," ") + nodesproperty=paste0(nodesproperty,tempwordlist[[1]][1]," ") + } + else{ + temp_line=paste(tempwordlist[[1]][1],tempwordlist[[1]][2]) + } + if (i==length(infolist[[j]])){ + nodesclasses=paste0(nodesclasses,classeswords," ") + if (nchar(temp_line)>0){ + temp_line=paste("\t",temp_line,";","\n") + } + if (trigger1==0){ + esci=paste(esci,j) + #title0=paste0("\"",title0,"\"") + #temp_line=paste(temp_line,"\t","rdfs:label!!!",title0,".","\n")#,".","\n") + } + + } + else { + if (nchar(temp_line)>0){ + temp_line=paste("\t",temp_line,";","\n") + } + } + } + # for multiple classes, paste each one after first class, seperate by comma + else { + classeswords=paste0(classeswords,",",tempword) + line_rdf=paste0(line_rdf,", ",tempword) + if (i==length(infolist[[j]])){ + nodesclasses=paste0(nodesclasses,classeswords," ") + if (nchar(temp_line)>0){ + temp_line=paste("\t",temp_line,";","\n") + } + #title0=paste0("\"",title0,"\"") + temp_line=paste(temp_line,";\n")#,"\t","rdfs:label",title0,";","\n") + } + else { + } + } + } + line_rdf=paste(line_rdf,temp_line) + # save entities in a character sting variable + line_rdf_vector[j]=line_rdf + } +} +# get titles and IDs +titles=IDs=0 +for (i in 1:length(infolist)){ + titles[i]=infolist[[i]][2] + IDs[i]=infolist[[i]][3] +} + +library(igraph) + +nodesfrom2=strsplit(nodesfrom," ") +nodesto2=strsplit(nodesto," ") +nodesproperty2=strsplit(nodesproperty," ") +exceptnum=except=nodesfrom3=nodesto3=nodesproperty3=nodesnm=0 +exceptwords=c("str:has","str:belongTo") + +# use a diagonal matrix to find duplicate nodes when users manually typed in associations, and replace the +# default associations "str:has" and "str:belongTo" by user-defined associations +if ((length(nodesfrom2[[1]])-1)>=1){ +for (i in 1:(length(nodesfrom2[[1]])-1)){ + for (j in ((i+1):length(nodesfrom2[[1]]))){ + if ((i!=j)&(length(nodesfrom2[[1]])>=j)&(length(nodesto2[[1]])>=j)){ + if((nodesfrom2[[1]][i]==nodesfrom2[[1]][j])&(nodesto2[[1]][i]==nodesto2[[1]][j])){ + exceptnum=exceptnum+1 + if (nodesproperty2[[1]][i] %in% exceptwords){ + except[exceptnum]=i + } + else { except[exceptnum]=j} + } + } + } +} + } + +nodesnames2=strsplit(nodesnames," ") + +# add nonexisting nodes to graph, even if they are not inserted as an entity +for (i in 1:length(nodesfrom2[[1]])){ + if (i %in% except){} + else { + nodesnm=nodesnm+1 + for (j in 1:length(IDs)){ + if (nodesfrom2[[1]][i]==IDs[j]){ + nodesfrom2[[1]][i]=titles[j] + } + else if (nodesto2[[1]][i]==IDs[j]){ + nodesto2[[1]][i]=titles[j] + } + } + # add nonexisting nodes to graph, set class as AutoAdded + + if (nodesfrom2[[1]][i] %in% nodesnames2[[1]]){} + else { + nodesnames=paste0(nodesnames,nodesfrom2[[1]][i]," ") + nodesclasses=paste0(nodesclasses,"AutoAdded"," ") + } + if (nodesto2[[1]][i] %in% nodesnames2[[1]]){} + else { + nodesnames=paste0(nodesnames,nodesto2[[1]][i]," ") + nodesclasses=paste0(nodesclasses,"AutoAdded"," ") + } + nodesfrom3[nodesnm]=nodesfrom2[[1]][i] + nodesto3[nodesnm]=nodesto2[[1]][i] + nodesproperty3[nodesnm]=nodesproperty2[[1]][i] + } +} + + +nodesnames2=strsplit(nodesnames," ") +nodesclasses2=strsplit(nodesclasses," ") + +# add nodes and nesting information into a data frame +nodes <- data.frame(name = return_space(nodesnames2[[1]]), + class = nodesclasses2[[1]]) + +nesting <- data.frame(from = return_space(nodesfrom3), + to = return_space(nodesto3), + property = nodesproperty3) + +g3 <- graph_from_data_frame(nesting, directed=TRUE, vertices=nodes) +E(g3)$label <- E(g3)$property + +#print (nodes) +#print (nesting) +#print (esci) +# append manually type-in information to each entity +if (length(esci)>0){ +escj=strsplit(esci," ") +#print (escj) +for (i in 1:length(line_rdf_vector)){ + if (i %in% escj[[1]]){ + #print (i) + #line_rdf_vector[i]=paste(line_rdf_vector[i],"\t","rdfs:label","\"",title1[i],"\"",".","\n") + tempnumber=which(nodesfrom3==titles[i]) + if (length(tempnumber)>0){ + for (j in 1:length(tempnumber)){ + #print ("j loop") + #print (j) + if (length(IDs[which(titles==nodesto3[tempnumber[j]])])>0){ + entityname2=paste0(prefix,":",IDs[which(titles==nodesto3[tempnumber[j]])]) + } + else { + entityname2=paste0(prefix,":",nodesto3[tempnumber[j]]) + } + if (j==length(tempnumber)){ + #print ("j=l") + #print (j) + #print (tempnumber) + #print (i) + #print (line_rdf_vector[i]) + line_rdf_vector[i]=paste(line_rdf_vector[i],"\t",nodesproperty3[tempnumber[j]],entityname2,";","\n") + line_rdf_vector[i]=paste(line_rdf_vector[i],"\t","rdfs:label","\"",title1[i],"\"",".","\n") + } + else{ + #print ("j!=l") + #print (j) + #print (tempnumber) + #print (i) + #print (line_rdf_vector[i]) + line_rdf_vector[i]=paste(line_rdf_vector[i],"\t",nodesproperty3[tempnumber[j]],entityname2,";","\n") + } + } + } + else # !if (length(tempnumber)>0) + {line_rdf_vector[i]=paste(line_rdf_vector[i],"\t","rdfs:label","\"",title1[i],"\"",".","\n")} + + + + } + else{ + tempnumber=which(nodesfrom3==titles[i]) + if (length(tempnumber)>0){ + for (j in 1:length(tempnumber)){ + if (length(IDs[which(titles==nodesto3[tempnumber[j]])])>0){ + entityname2=paste0(prefix,":",IDs[which(titles==nodesto3[tempnumber[j]])]) + } + else { + entityname2=paste0(prefix,":",nodesto3[tempnumber[j]]) + } + if (j==length(tempnumber)){ + #print ("!") + #print (j) + #print (tempnumber) + #print (i) + #print (line_rdf_vector[i]) + line_rdf_vector[i]=paste(line_rdf_vector[i],"\t",nodesproperty3[tempnumber[j]],entityname2,";","\n") + line_rdf_vector[i]=paste(line_rdf_vector[i],"\t","rdfs:label","\"",title1[i],"\"",".","\n") + } + else{ + #print ("!") + #print (j) + #print (tempnumber) + #print (i) + #print (line_rdf_vector[i]) + line_rdf_vector[i]=paste(line_rdf_vector[i],"\t",nodesproperty3[tempnumber[j]],entityname2,";","\n") + } + } + } + else # !if (length(tempnumber)>0) + {line_rdf_vector[i]=paste(line_rdf_vector[i],"\t","rdfs:label","\"",title1[i],"\"",".","\n")} + }#else + +} + } + + +# convert %20 to space +for (i in 1:length(line_rdf_vector)){ + line_rdf_vector[i]=return_space(line_rdf_vector[i]) + lines_rdf=paste(lines_rdf,line_rdf_vector[i]) +} +# generate RDF file +if (rdf=="ttl"){ + write(lines_rdf,file=outputfile2) +print("Create a RDF file successfully. Please find the output file in:") +print(getwd()) +print(paste("Your file name is:",outputfile2)) +} +if (graph){ + if (HideAssociation==TRUE){ + nestinghide <- data.frame(from = nesting$from, + to = nesting$to, + property = rep("",nrow(nesting))) + + g3 <- graph_from_data_frame(nestinghide , directed=TRUE, vertices=nodes) + } + + +E(g3)$label <- E(g3)$property +print(g3, e=TRUE, v=TRUE) +plot(g3, edge.arrow.size=.2, edge.curved=.4) + } + + + } + # original below (delet else): + else if (dir_out == "" && file_out == "object") { lines } else { cat(lines, file = path_out, sep = "\n") diff --git a/R/utils.R b/R/utils.R index 86d7784..995c1b7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -106,7 +106,11 @@ encl <- function(..., left = "{", right = "}") { #' @param x character vector to remove white space #' @keywords internal rm_space <- function(x) { - gsub(" ", "", x) + gsub(" ", "%20", x) +} + +return_space <- function(x) { + gsub("%20", " ", x) } @@ -136,14 +140,27 @@ create_fill <- function(title = "title", id = "fj3", classes, attributes, + json_ld = FALSE, function_container) { - with(function_container, paste( - get_title(title), + if (json_ld) { + with(function_container, paste( + + encl(paste0("\"@id\":",get_id(id),","), + paste0("\"@type\":",papply(classes, get_class),","), + paste0("\"label\":",rm_space(get_title(title))), + papply(rm_space(attributes), get_attribute)) + ) + ) + } else { + with(function_container, paste( + rm_space(get_title(title)), encl(get_id(id), papply(classes, get_class), papply(rm_space(attributes), get_attribute)) ) ) + } + } diff --git a/README.Rmd b/README.Rmd index fbd2055..db8163e 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,8 +16,24 @@ library(strcode) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/strcode)](https://cran.r-project.org/package=strcode) [![packageversion](https://img.shields.io/badge/Package%20version-0.2.0-orange.svg?style=flat-square)](commits/master) +# Table of Contents +1. Introduction +2. Installation +3. Structuring Code + + Basic Code Structuring (no embedded semantics) + + Structuring Code with Embedded Semantics + + Anchoring Sections + + Inserting a code anchor +4. Summarizing Code + + Summarizing without Embedded Semantics + + Summarizing with Embedded Semantics +5. Improving Legibility through Code Structuring + + Improve Legibility without Embedded Semantics + + Improved Legibility with Embedded Semantics +# Introduction + The `strcode` (short for structuring code) package contains tools to organize and abstract your code better. It consists of @@ -28,7 +44,9 @@ and abstract your code better. It consists of experience further. - A function `sum_str` that summarizes the code structure based on the separators and their comments added with the Add-in. For one or more files, - it can cat the structure to the console or a file. + it can cat the structure to the console or a file. + - `sum_str` function can also structure code with embedded semantics, and generate an RDF file and RDF diagram automatically. + - Users can define the association library by themselves for `sum_str` which contains rules of automatically generated associations. - An [RStudio Add-in](https://rstudio.github.io/rstudioaddins/) that lets you insert a code anchor, that is, a hash sequence which can be used to uniquely identify a line in a large code base. @@ -45,32 +63,34 @@ knitr::include_graphics("https://raw.githubusercontent.com/lorenzwalthert/strcod You can install the package from GitHub. ```{r, eval = FALSE} # install.packages("devtools") -devtools::install_github("lorenzwalthert/strcode") +devtools::install_github("XiaoliangJiang/strcode",ref="semantics") ``` # Structuring code + +## Basic Code Structuring (no embedded semantics) We suggest three levels of granularity for code structuring, whereas higher-level blocks can contain lower-level blocks. -- level 1 sections, which are high-level blocks that can be +- **level 1** sections, which are high-level blocks that can be separated as follows: ```{r} -# ____________________________________________________________________________ -# A title #### +# ________________________________________________________________________ +# A title #### ``` -- level 2 sections, which are medium-level blocks that can be +- **level 2** sections, which are medium-level blocks that can be separated as follows: ```{r} -## ............................................................................ -## A subtitle #### +## ........................................................................ +## A subtitle #### ``` -- level 3 sections, which are low-level blocks that can be +- **level 3** sections, which are low-level blocks that can be separated as follows: ```{r} -### .. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . -### One more #### +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### One more #### ``` @@ -96,6 +116,107 @@ By default, breaks and titles are inserted via a Shiny Gadget, but this default can be overridden by setting the option `strcode$insert_with_shiny` to `FALSE` and hence only inserting the break. +## Structuring Code with Embedded Semantics + +After invoking the interface, click check box `Add semantics` to show more options for semantic use. For complicated scripts up to seven levels of granularity might be required. Examples semantic annotation at different levels appear below. + +- **Level 1** sections: the highest-level blocks that usually represent user's workflow domain and can be separated as follows: + +``` r +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow} #### +``` + +- **Level 2** sections: second-highest-level blocks that are the highest level for detailed entities can be separated as follows: + +``` r +## ........................................................................ +## YourProcess {ProcessID provone:Process} #### +``` + +- **Level 3** sections: lower-level blocks that can be separated as follows: + +``` r +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### YourProcess02 {ProcessID02 provone:Process} #### +``` + +- **Level 4** sections: lower-level blocks that can be separated as follows: + +``` r +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### YourData {DataID provone:Data} #### +``` +or +``` r +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### YourData {VisualizationID provone:Visualization} #### +``` + +Notice from these examples that: + +- The break characters `___`, `...`, `.. ..`,`... ...` were chosen such that they reflect the level of granularity, namely `___` means highest level (or domain level), and the number of dots between spaces represents different lower levels, for example, `.. ..` means second level of entities level (third level in total). + +- The first argument which is outside of the brace `{` is the *title*. The first argument in the pair of braces is *ID* and second argument is *class*. If you add more manual inputs, they will appear in the pair of braces after class, like: + +``` r +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### YourData {DataID provone:Data manuallyinput=value} #### +``` + +- Manually inputted values must follow the structure: `property=value`. The user interface can only accept manually input values following this structure! +- A structure following JSON-LD syntax also can be generated by the function by selecting `JSON-LD`. With same input above, the result of using JSON-LD style let users understand arguments easier and it is like: + +``` r +#### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +#### {"@id":DataID, "@type":provone:Data, "label":YourData} #### +``` + +- **Since the summarization function uses code levels to automatically generate associations between entities, for semantics embedding a child entity must be 'under' its parent entity.** +- Notice that only given classes exist in `AssociationLibrary.txt` file (in `semantics` branch, `demos` folder) can serve as associations and automatically generated by the function. After downloading and pasting it into your current working directory --- use `getwd()` to show your working directory in R --- you could added new associations into this library by yourself. This file has five columns: `ParentClass` , `ChildClass`, `Ways`, `Property`, and `ReverseProperty`. You must follow this structure and add five values --- separated by commas `,` --- for your user-defined associations. The first value in `ParentClass` column is the class of parent entity; the second value in `ChildClass` is the class of child entity; the third value in `Ways` column can be only `1` or `2` which indicates this association is one-way or two-way; the fourth value in `Property` column represents the property of the association from parent entity to child entity, which also will be shown in RDF diagram; the last value in the `ReverseProperty` column represent the property in reverse way, which is from child entity to parent entity, and this value could be " " in onw-way associations. + +- If a parent entity has multiple child entities, put these child entities under the parent entity level, and the function will generat correct relationship automatically. For example, if your inputs are as follows: + +``` r +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow} #### + +## ........................................................................ +## YourProcess {ProcessID provone:Process} #### + +## ........................................................................ +## YourProcess02 {ProcessID02 provone:Process} #### +``` + +To insert semantics breaks, you can follow this: + + +A graphical representation of the RDF for this structure based on `igraph` looks like: + + + + +To generate this RDF graph, please make sure that you have already download and paste the "AssociationLibrary.txt" in your current working directory, and using this function `sum_str(graph=TRUE,UserAL=TRUE)` as follows: + + +- If a child entity has multiple parent entities, please manually input associations in this structure: associations=ID. for example, if your breaks are as follows: + +``` r +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow provone:hasSubProcess=ProcessID02}#### + +## ........................................................................ +## YourProcess {ProcessID provone:Process} #### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### YourProcess02 {ProcessID02 provone:Process provone:wasDerivedFrom=WorkID}#### +``` +The RDF graph now looks like : + + + +Notice that if you want to generate two-ways association, you need to manually added associations both in parent and child entities. In addition, only given words which exist in AssociationNames.txt file (in semantics branch, demos folder) can serve as associations between two entities and be manually added in. After download and paste this file into your current working directory, you could add associations by yourself. For more information about how to add associations into the list, please read Summarizing code part. + # Anchoring sections Sometimes it is required to refer to a code section, which can be done by title. A better way, however, is to use a unique hash sequence - let us call it a @@ -121,6 +242,8 @@ The hash sequences in strcode are produced with the R package [digest](https://github.com/eddelbuettel/digest). # Summarizing code + +## Without Embedded Semantics Once code has been structured by adding sections (as above), it can easily be summarized or represented in a compact and abstract form. This is particularly handy when the codebase is large, when a lot of people work on the code or when @@ -153,8 +276,23 @@ sum_str(path_in = "placeholder_code/example.R", that they are slightly off since knitr uses a different tab length. In the R console and more imporantly in the outputed file, they are aliged. +## Summarizing with Embedded Semantics +For semantic use, some new arguments are needed. +- `rdf` specifies a type of output rdf file. Only "ttl" has been added into function right now. +- `graph` indicates whether to generate a RDF graph or not. The default value is FALSE. +- `domain` indicates whether to use User-defined working domain. If `domain=TRUE`, the value of baseURI, UserID and prefix values are needed +- `baseURI` specifies a working URI, the default value is `"http://example.org/base/"`. +- `UserID` specifies a lower level of working URI, the default value is "UserID". Combine with baseURI the full URI with default value will be `"http://example.org/base/UserID"`. +- `prefix` specifies an abbreviate name of user domain. The default value is "user". The prefix and full URI will become prefix header of output RDF file as follows: ` @prefix cwf: .` +- `UserAL` indicates whether to use User-defined association library, and the default value is FALSE. With default value FALSE, the function will generate a .txt file in your current working directory named DefaultAssociationLibrary.txt. You can rename it as AssociationLibrary.txt to let it serve as your user-defined association library, and add new relations in it. +- `UserANM` indicates whether to use User-defined association names, and the default value is FALSE. With default value FALSE, the function will generate a .txt file in your current working directory named DefaultAssociationNames.txt. You can rename it as AssociationNames.txt to let it serve as your user-defined association names list, and add new associations in it. +-   `fillAssociation` indicates whether to use default associations `str:has` and `str:belongTo` to supplement vacancy association. The default value is TRUE. Since the function could only use associations which exist in associations, if set this value to FALSE and no association belongs to some given pairs of parent and child entities, the output file and RDF diagram may be incorrect. If you see `str:has` and `str:belongTo` in your output file or RDF diagram, you need to add that associations in AssociationLibrary.txt and regenerate your output file. + +You can find an example of how those arguments work in Example of improved legibility part. + +# Improving Legibility through Code Structuring +## Improve Legibility without Embedded Semantics -## Example of improved legibility To demonstrate the improvement in legibility, we give an extended example with some placeholder code. ```{r} @@ -321,3 +459,173 @@ test2 <- function(x) { ## ............................................................................ } ``` + +## Improved Legibility with Embedded Semantics + +By using input breaks as follows: +``` r +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow} #### + +## ........................................................................ +## YourProcess {ProcessID provone:Process} #### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### YourProcess02 {ProcessID02 provone:Process} #### +``` +And `sum_str` function with following arguments: +``` r +sum_str(rdf="ttl") +``` +You can get an out put file like this: +``` + @prefix user: . + @prefix provone: . + + user:WorkID a provone:Workflow ; + rdfs:label "YourWorkflow" ; + str:has user:ProcessID . + + user:ProcessID a provone:Process ; + rdfs:label "YourProcess" ; + str:belongTo user:WorkID ; + provone:hasSubProcess user:ProcessID02 . + + user:ProcessID02 a provone:Process ; + rdfs:label "YourProcess02" ; + provone:wasDerivedFrom user:ProcessID . +``` +Notice that `str:has` and `str:belongTo` are in this output file, which means no such association between a `provone:Workflow` and `provone:Process` exist in default associations library. You can rename the auto-created file DefaultAssociationLibrary.txt in your working directory and add a new line `"provone:Workflow","provone:Process",1,"provone:hasSubProcess"," "` in it. After that, save your file and if you run the function again with a new argument `UserAL=TRUE`, the output file will as follows: +``` + @prefix user: . + @prefix provone: . + + user:WorkID a provone:Workflow ; + rdfs:label "YourWorkflow" ; + provone:hasSubProcess user:ProcessID . + + user:ProcessID a provone:Process ; + rdfs:label "YourProcess" ; + provone:hasSubProcess user:ProcessID02 . + + user:ProcessID02 a provone:Process ; + rdfs:label "YourProcess02" ; + provone:wasDerivedFrom user:ProcessID . +``` +You may find that default associations are missing in this output file. + +You can add another argument `graph=TRUE` into your function. With `sum_str(rdf="ttl", UserAL=TRUE, graph=TRUE)`, you can generate a RDF diagram as follows: + + + +If you want to add new associations into existing file, you need to check whether it exists in user-defined AssociationNames.txt file. For example, now we want to let `YourProcess02` be a sub-process of `YourWorkflow`. First, you need to check whether `provone:hasSubProcess` is in AssociationNames.txt. You can rename the DefaultAssociationNames.txt file which automatically generated in your current working directory as AssociationNames.txt. We could find this association in this file apparently. Then, you can add a new argument `UserANM=TRUE` into `sum_str` function, and use manually input bar in UI to type in `provone:hasSubProcess=ProcessID02` and regenerate YourWorkflow entity like this: +```r +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow provone:hasSubProcess=ProcessID02}#### +``` +Notice that using ID as value here, but not title name. +With `sum_str(rdf="ttl", UserAL=TRUE, graph=TRUE, UserANM=TRUE)`, you can generate a new output file with `provone:hasSubProcess user:ProcessID02`, and a new RDF diagram as follows: + + + +If you want to define your domain, you need to add more arguments. For example, with `sum_str(rdf="ttl", UserAL=TRUE, graph=TRUE, UserANM=TRUE,prefix="test",baseURI = "http://testwebsite/",UserID ="testUser")`, you can generate a new output file as follows: +``` + @prefix test: . + @prefix provone: . + + test:WorkID a provone:Workflow ; + rdfs:label "YourWorkflow" ; + provone:hasSubProcess test:ProcessID02 ; + provone:hasSubProcess test:ProcessID . + + test:ProcessID a provone:Process ; + rdfs:label "YourProcess" ; + provone:hasSubProcess test:ProcessID02 . + + test:ProcessID02 a provone:Process ; + rdfs:label "YourProcess02" ; + provone:wasDerivedFrom test:ProcessID . +``` +Notice that all prefixes are set to test, and your user-defined domain appears in the first line. + +Now we could use a part of a real example to generate a sample output file. Notice that all `%20` below means space which automatically generated by the function if you type spaces in your title or manually input bar. They will return as spaces in output file and RDF diagram. +You can try to use these code as your input breaks: +``` r +# ________________________________________________________________________ +# CPP_Workflow {workflow_cpp provone:Workflow skos:altLabel="CPP%20Workflow:%20Workflow%20top%20level"}#### + +## ........................................................................ +## Recipe%20for%20CPP%20Data {gates_recipe prov:Plan skos:altLabel="Recipe%20document:%20Documentation%20on%20Recipe%20for%20CPP%20Data" rdfs:seeAlso= rdfs:seeAlso= cwfo:hasCode="MATLAB:%20TensorExplorationPARAFAC.m,%20YenerTensor.m"}#### + +## ........................................................................ +## Data%20Gathering {data_gathering provone:Program skos:altLabel="Workflow%20Phase%200"}#### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### Load%20Test%20Dataset {load_test_dataset provone:Program skos:altLabel="ANTHAyenerfinalmine.xlsx" rdfs:comment="Loading%20of%20initial%20matrix%20(output%20of%20YenerTensor.m)"}#### + +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### Test%20Dataset%20(TENSOR) {test_dataset_data provone:Data rdfs:comment="Test%20Dataset" cwfo:hasCode="MATLAB:%20YenerTensor.m"}#### + +## ........................................................................ +## Data%20Processing {data_processing provone:Program skos:altLabel="Workflow%20Phase%201"}#### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### Adjust%20APGAR%20Scores {adjust_apgar_scores provone:Program skos:altLabel="Ensure%20APGAR%20scores%20are%20between%200-10" rdfs:comment="MATLAB%20code:%20YenerTensor.m,%20Section%201" dcterms:conformsTo= cwfo:hasInData=test_dataset_data}#### + +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### Adjusted%20APGAR%20Data {adjusted_apgar_data provone:Data rdfs:comment="Adjusted%20APGAR%20Data"}#### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### Choose%20Subjects {choose_subjects provone:Program skos:altLabel="Choose%20only%20subjects%20with%20all%20five%20time%20points" rdfs:comment="MATLAB%20code:%20YenerTensor.m,%20Section%201" dcterms:conformsTo= cwfo:hasInData=adjusted_apgar_data cwfo:hasOutData=growth_data}#### +``` +Then download `AssociationNames.txt` and `AssociationLibrary.txt` in `demos` forder, and paste them into your current working directory. With `sum_str(rdf="ttl",graph=TRUE,UserAL=FALSE,UserANM=FALSE,prefix="cwf",baseURI = "http://cwf.tw.rpi.edu/",UserID ="data#" )`, you could generate a output file as follows: +``` + @prefix test: . + @prefix provone: . + @prefix skos: . + @prefix prov: . + @prefix rdfs: . + @prefix cwfo: . + @prefix dcterms: . + + test:workflow_cpp a provone:Workflow ; + skos:altLabel "CPP Workflow: Workflow top level" ; + rdfs:label "CPP_Workflow" ; + prov:hadPlan test:gates_recipe ; + provone:hasSubProgram test:data_gathering ; + provone:hasSubProgram test:data_processing . + + test:gates_recipe a prov:Plan ; + skos:altLabel "Recipe document: Documentation on Recipe for CPP Data" ; + rdfs:seeAlso ; + rdfs:seeAlso ; + cwfo:hasCode "MATLAB: TensorExplorationPARAFAC.m, YenerTensor.m" ; + rdfs:label "Recipe for CPP Data" ; + + test:data_gathering a provone:Program ; + skos:altLabel "Workflow Phase 0" ; + rdfs:label "Data Gathering" ; + provone:hasSubProgram test:load_test_dataset . + + test:load_test_dataset a provone:Program ; + skos:altLabel "ANTHAyenerfinalmine.xlsx" ; + rdfs:comment "Loading of initial matrix (output of YenerTensor.m)" ; + rdfs:label "Load Test Dataset" ; + cwfo:hasOutData test:test_dataset_data . + + test:test_dataset_data a provone:Data ; + rdfs:comment "Test Dataset" ; + cwfo:hasCode "MATLAB: YenerTensor.m" ; + rdfs:label "Test Dataset (TENSOR)" ; + + test:data_processing a provone:Program ; + skos:altLabel "Workflow Phase 1" ; + rdfs:label "Data Processing" ; + provone:hasSubProgram test:adjust_apgar_scores ; + provone:hasSubProgram test:choose_subjects . +``` + +The code generates a graph representation of the resulting RDF such as this: + + + diff --git a/README.md b/README.md index 66d684c..e71d94a 100644 --- a/README.md +++ b/README.md @@ -3,65 +3,188 @@ README [![Build Status](https://travis-ci.org/lorenzwalthert/strcode.svg?branch=master)](https://travis-ci.org/lorenzwalthert/strcode) [![Project Status: WIP ? Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](http://www.repostatus.org/badges/latest/wip.svg)](http://www.repostatus.org/#wip) [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/strcode)](https://cran.r-project.org/package=strcode) [![packageversion](https://img.shields.io/badge/Package%20version-0.2.0-orange.svg?style=flat-square)](commits/master) -The `strcode` (short for structuring code) package contains tools to organize and abstract your code better. It consists of - -- An [RStudio Add-in](https://rstudio.github.io/rstudioaddins/) that lets you quickly add code block separators and titles (possibly with unique identifiers) to divide your work into sections. The titles are recognized as sections by RStudio, which enhances the coding experience further. -- A function `sum_str` that summarizes the code structure based on the separators and their comments added with the Add-in. For one or more files, it can cat the structure to the console or a file. -- An [RStudio Add-in](https://rstudio.github.io/rstudioaddins/) that lets you insert a code anchor, that is, a hash sequence which can be used to uniquely identify a line in a large code base. +# Table of Contents +1. [Introduction](#intro) +2. [Installation](#install) +3. [Structuring Code](#structuring) + + Basic Code Structuring (no embedded semantics) + + [Structuring Code with Embedded Semantics](#structuringSem) + + Anchoring Sections + + Inserting a code anchor +4. [Summarizing Code](#summarizing) + + Summarizing without Embedded Semantics + + [Summarizing with Embedded Semantics](#summarizingSem) +5. [Improving Legibility through Code Structuring](#legibility) + + Improve Legibility without Embedded Semantics + + [Improved Legibility with Embedded Semantics](#legibilitySem) + + +# Introduction + +This special version of the `strcode` package (short for "structuring code") contains tools to help you organize and abstract your R scripts and to embed semantics according to the [ProvONE Data Model for Workflow Semantics](http://bit.ly/2eOUuAX). It consists of: + +- An [RStudio Add-in](https://rstudio.github.io/rstudioaddins/) that lets you quickly insert code block separators and titles (possibly with unique identifiers) to divide your work into sections. The titles are recognized as sections by RStudio, which further enhances the coding experience. +- A function `sum_str` that summarizes the code structure based on the separators and comments added with the Add-in. For one or more files, it can send the structure to the console or a file. + - **NEW**: The `sum_str` function now can structure code with **embedded semantics** and can generate RDF files (N3 format) and visual representations of the RDF graph automatically. + - To assist RDF generation, users can manually define the association library (relationships between entities of various classes). `sum_str` has built-in rules for automatic property generation for `ProvONE` entity types. +- An [RStudio Add-in](https://rstudio.github.io/rstudioaddins/) that lets you insert a code anchor, that is, a hash sequence which can be used to uniquely identify a line in a large code base. + +The animation below shows using `strcode` for basic code structuring. Embedding semantics is similar + -Installation -============ + +# Installation You can install the package from GitHub. ``` r # install.packages("devtools") -devtools::install_github("lorenzwalthert/strcode") +devtools::install_github("XiaoliangJiang/strcode",ref="semantics") ``` + +# Structuring Code -Structuring code -================ +## Basic Code Structuring (no embedded semantics) +We suggest three levels of granularity for 'basic' code structuring, whereas higher-level blocks can contain lower-level blocks. + +- **Level 1** sections: high-level blocks that can be separated as follows: + +``` r +# ________________________________________________________________________ +# A title #### +``` -We suggest three levels of granularity for code structuring, whereas higher-level blocks can contain lower-level blocks. +- **Level 2** sections: medium-level blocks that can be separated as follows: -- level 1 sections, which are high-level blocks that can be separated as follows: ``` r -# ____________________________________________________________________________ -# A title #### +## ........................................................................ +## A subtitle #### ``` -- level 2 sections, which are medium-level blocks that can be separated as follows: +- **Level 3** sections: low-level blocks that can be separated as follows: ``` r -## ............................................................................ -## A subtitle #### +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### One more #### ``` -- level 3 sections, which are low-level blocks that can be separated as follows: +Notice from these examples that: + +- The number of `#` used in front of the break character (`___`, `...`, `.. ..`) corresponds to the level of granularity that is separated. +- The break characters `___`, `...`, `.. ..` were chosen such that they reflect the level of granularity, namely `___` has a much higher visual density than `.. ..`. +- Each block has an (optional) short title on what that block is about. +- Every title ends with `####`. Therefore, the titles are recognized by RStudio as [sections](https://support.rstudio.com/hc/en-us/articles/200484568-Code-Folding-and-Sections). This has the advantages that you can get a quick summary of your code in Rstudio's code pane and you can fold sections as you can fold code or function declarations or if statements. See the pictures below for details. + +*The separators each have a length of 80 characters*. The value is set by the global option `strcode$char_length` and can therefore be changed by the user. + +By default, breaks and titles are inserted via a `shiny` Gadget, but this default may be overridden by setting the option `strcode$insert_with_shiny` to `FALSE` and hence only the break is inserted. + + +## Structuring Code with Embedded Semantics + +After invoking the interface, click check box `Add semantics` to show more options for semantic use. For complicated scripts up to seven levels of granularity might be required. Examples semantic annotation at different levels appear below. + +- **Level 1** sections: the highest-level blocks that usually represent user's workflow domain and can be separated as follows: ``` r -### .. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . -### One more #### +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow} #### +``` + +- **Level 2** sections: second-highest-level blocks that are the highest level for detailed entities can be separated as follows: + +``` r +## ........................................................................ +## YourProcess {ProcessID provone:Process} #### +``` + +- **Level 3** sections: lower-level blocks that can be separated as follows: + +``` r +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### YourProcess02 {ProcessID02 provone:Process} #### +``` + +- **Level 4** sections: lower-level blocks that can be separated as follows: + +``` r +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### YourData {DataID provone:Data} #### ``` +or +``` r +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### YourData {VisualizationID provone:Visualization} #### +``` + +Notice from these examples that: -You can notice from the above that +- The break characters `___`, `...`, `.. ..`,`... ...` were chosen such that they reflect the level of granularity, namely `___` means highest level (or domain level), and the number of dots between spaces represents different lower levels, for example, `.. ..` means second level of entities level (third level in total). -- The number of `#` used in front of the break character (`___`, `...`, `. .`) corresponds to the level of granularity that is separated. -- The breaks characters `___`, `...`, `. .` were chosen such that they reflect the level of granularity, namely `___` has a much higher visual density than `. .`. -- Each block has an (optional) short title on what that block is about. -- Every title ends with `####`. Therefore, the titles are recognized by RStudio as [sections](https://support.rstudio.com/hc/en-us/articles/200484568-Code-Folding-and-Sections). This has the advantages that you can get a quick summary of your code in Rstudio's code pane and you can fold sections as you can fold code or function declarations or if statements. See the pictures below for details. +- The first argument which is outside of the brace `{` is the *title*. The first argument in the pair of braces is *ID* and second argument is *class*. If you add more manual inputs, they will appear in the pair of braces after class, like: + +``` r +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### YourData {DataID provone:Data manuallyinput=value} #### +``` + +- Manually inputted values must follow the structure: `property=value`. The user interface can only accept manually input values following this structure! +- A structure following JSON-LD syntax also can be generated by the function by selecting `JSON-LD`. With same input above, the result of using JSON-LD style let users understand arguments easier and it is like: + +``` r +#### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +#### {"@id":DataID, "@type":provone:Data, "label":YourData} #### +``` + +- **Since the summarization function uses code levels to automatically generate associations between entities, for semantics embedding a child entity must be 'under' its parent entity.** +- Notice that only given classes exist in `AssociationLibrary.txt` file (in `semantics` branch, `demos` folder) can serve as associations and automatically generated by the function. After downloading and pasting it into your current working directory --- use `getwd()` to show your working directory in R --- you could added new associations into this library by yourself. This file has five columns: `ParentClass` , `ChildClass`, `Ways`, `Property`, and `ReverseProperty`. You must follow this structure and add five values --- separated by commas `,` --- for your user-defined associations. The first value in `ParentClass` column is the class of parent entity; the second value in `ChildClass` is the class of child entity; the third value in `Ways` column can be only `1` or `2` which indicates this association is one-way or two-way; the fourth value in `Property` column represents the property of the association from parent entity to child entity, which also will be shown in RDF diagram; the last value in the `ReverseProperty` column represent the property in reverse way, which is from child entity to parent entity, and this value could be " " in onw-way associations. + +- If a parent entity has multiple child entities, put these child entities under the parent entity level, and the function will generat correct relationship automatically. For example, if your inputs are as follows: + +``` r +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow} #### + +## ........................................................................ +## YourProcess {ProcessID provone:Process} #### + +## ........................................................................ +## YourProcess02 {ProcessID02 provone:Process} #### +``` +To insert semantics breaks, you can follow this: + -The separators all have length 80. The value is looked up in the global option `strcode$char_length` and can therefore be changed by the user. +A graphical representation of the RDF for this structure based on `igraph` looks like: -By default, breaks and titles are inserted via a Shiny Gadget, but this default can be overridden by setting the option `strcode$insert_with_shiny` to `FALSE` and hence only inserting the break. + -Anchoring sections -================== +To generate this RDF graph, please make sure that you have already download and paste the "AssociationLibrary.txt" in your current working directory, and using this function `sum_str(graph=TRUE,UserAL=TRUE)` as follows: + + +- If a child entity has multiple parent entities, please manually input associations in this structure: associations=ID. for example, if your breaks are as follows: + +``` r +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow provone:hasSubProcess=ProcessID02}#### + +## ........................................................................ +## YourProcess {ProcessID provone:Process} #### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### YourProcess02 {ProcessID02 provone:Process provone:wasDerivedFrom=WorkID}#### +``` +The RDF graph now looks like : + + + +Notice that if you want to generate two-ways association, you need to manually added associations both in parent and child entities. In addition, only given words which exist in AssociationNames.txt file (in semantics branch, demos folder) can serve as associations between two entities and be manually added in. After download and paste this file into your current working directory, you could add associations by yourself. For more information about how to add associations into the list, please read Summarizing code part. + +## Anchoring Sections Sometimes it is required to refer to a code section, which can be done by title. A better way, however, is to use a unique hash sequence - let us call it a code anchor - to create an arguably unique reference to that section. A code anchor in `strcode` is enclosed by `#<` and `>#` so all anchors can be found using regular expressions. You can add section breaks that include a hash. That might look like this: @@ -70,8 +193,7 @@ Sometimes it is required to refer to a code section, which can be done by title. ## An anchored section #### ``` -Insert a code anchor -==================== +## Inserting a code anchor Code anchors might prove helpful in other situations where one want to anchor a single line. That is also possible with `strcode`. An example of a code anchor is the following: @@ -82,9 +204,10 @@ this_is_a_super_important_but_hard_to_describe_line_so_let_me_anchor_it The hash sequences in strcode are produced with the R package [digest](https://github.com/eddelbuettel/digest). -Summarizing code -================ + +# Summarizing Code +## Without Embedded Semantics Once code has been structured by adding sections (as above), it can easily be summarized or represented in a compact and abstract form. This is particularly handy when the codebase is large, when a lot of people work on the code or when new people join a project. The function `sum_str` is designed for the purpose of extracting separators and respective comments, in order to provide high level code summaries. It is highly customizable and flexible, with a host of options. Thanks to RStudio's API, you can even create summaries of the file you are working on, simply by typing `sum_str()` in the console. The file presented in the example section below can be summarized as follows: ``` r @@ -115,8 +238,24 @@ sum_str(path_in = "placeholder_code/example.R", - Similarly, we use `lowest_sep = FALSE` to indicate that we want lowest separators (given `granularity`) to be omitted between the titles of the sections. - `header` was set to `TRUE`, so the column names were reported as well. Note that they are slightly off since knitr uses a different tab length. In the R console and more imporantly in the outputed file, they are aliged. -Example of improved legibility ------------------------------- + +## Summarizing with Embedded Semantics +For semantic use, some new arguments are needed. +- `rdf` specifies a type of output rdf file. Only "ttl" has been added into function right now. +- `graph` indicates whether to generate a RDF graph or not. The default value is FALSE. +- `domain` indicates whether to use User-defined working domain. If `domain=TRUE`, the value of baseURI, UserID and prefix values are needed +- `baseURI` specifies a working URI, the default value is `"http://example.org/base/"`. +- `UserID` specifies a lower level of working URI, the default value is "UserID". Combine with baseURI the full URI with default value will be `"http://example.org/base/UserID"`. +- `prefix` specifies an abbreviate name of user domain. The default value is "user". The prefix and full URI will become prefix header of output RDF file as follows: ` @prefix cwf: .` +- `UserAL` indicates whether to use User-defined association library, and the default value is FALSE. With default value FALSE, the function will generate a .txt file in your current working directory named DefaultAssociationLibrary.txt. You can rename it as AssociationLibrary.txt to let it serve as your user-defined association library, and add new relations in it. +- `UserANM` indicates whether to use User-defined association names, and the default value is FALSE. With default value FALSE, the function will generate a .txt file in your current working directory named DefaultAssociationNames.txt. You can rename it as AssociationNames.txt to let it serve as your user-defined association names list, and add new associations in it. +-   `fillAssociation` indicates whether to use default associations `str:has` and `str:belongTo` to supplement vacancy association. The default value is TRUE. Since the function could only use associations which exist in associations, if set this value to FALSE and no association belongs to some given pairs of parent and child entities, the output file and RDF diagram may be incorrect. If you see `str:has` and `str:belongTo` in your output file or RDF diagram, you need to add that associations in AssociationLibrary.txt and regenerate your output file. + +You can find an example of how those arguments work in Example of improved legibility part. + + +# Improving Legibility through Code Structuring +## Improve Legibility without Embedded Semantics To demonstrate the improvement in legibility, we give an extended example with some placeholder code. @@ -284,3 +423,173 @@ test2 <- function(x) { ## ............................................................................ } ``` + +## Improved Legibility with Embedded Semantics + +By using input breaks as follows: +``` r +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow} #### + +## ........................................................................ +## YourProcess {ProcessID provone:Process} #### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### YourProcess02 {ProcessID02 provone:Process} #### +``` +And `sum_str` function with following arguments: +``` r +sum_str(rdf="ttl") +``` +You can get an out put file like this: +``` + @prefix user: . + @prefix provone: . + + user:WorkID a provone:Workflow ; + rdfs:label "YourWorkflow" ; + str:has user:ProcessID . + + user:ProcessID a provone:Process ; + rdfs:label "YourProcess" ; + str:belongTo user:WorkID ; + provone:hasSubProcess user:ProcessID02 . + + user:ProcessID02 a provone:Process ; + rdfs:label "YourProcess02" ; + provone:wasDerivedFrom user:ProcessID . +``` +Notice that `str:has` and `str:belongTo` are in this output file, which means no such association between a `provone:Workflow` and `provone:Process` exist in default associations library. You can rename the auto-created file DefaultAssociationLibrary.txt in your working directory and add a new line `"provone:Workflow","provone:Process",1,"provone:hasSubProcess"," "` in it. After that, save your file and if you run the function again with a new argument `UserAL=TRUE`, the output file will as follows: +``` + @prefix user: . + @prefix provone: . + + user:WorkID a provone:Workflow ; + rdfs:label "YourWorkflow" ; + provone:hasSubProcess user:ProcessID . + + user:ProcessID a provone:Process ; + rdfs:label "YourProcess" ; + provone:hasSubProcess user:ProcessID02 . + + user:ProcessID02 a provone:Process ; + rdfs:label "YourProcess02" ; + provone:wasDerivedFrom user:ProcessID . +``` +You may find that default associations are missing in this output file. + +You can add another argument `graph=TRUE` into your function. With `sum_str(rdf="ttl", UserAL=TRUE, graph=TRUE)`, you can generate a RDF diagram as follows: + + + +If you want to add new associations into existing file, you need to check whether it exists in user-defined AssociationNames.txt file. For example, now we want to let `YourProcess02` be a sub-process of `YourWorkflow`. First, you need to check whether `provone:hasSubProcess` is in AssociationNames.txt. You can rename the DefaultAssociationNames.txt file which automatically generated in your current working directory as AssociationNames.txt. We could find this association in this file apparently. Then, you can add a new argument `UserANM=TRUE` into `sum_str` function, and use manually input bar in UI to type in `provone:hasSubProcess=ProcessID02` and regenerate YourWorkflow entity like this: +```r +# ________________________________________________________________________ +# YourWorkflow {WorkID provone:Workflow provone:hasSubProcess=ProcessID02}#### +``` +Notice that using ID as value here, but not title name. +With `sum_str(rdf="ttl", UserAL=TRUE, graph=TRUE, UserANM=TRUE)`, you can generate a new output file with `provone:hasSubProcess user:ProcessID02`, and a new RDF diagram as follows: + + + +If you want to define your domain, you need to add more arguments. For example, with `sum_str(rdf="ttl", UserAL=TRUE, graph=TRUE, UserANM=TRUE,prefix="test",baseURI = "http://testwebsite/",UserID ="testUser")`, you can generate a new output file as follows: +``` + @prefix test: . + @prefix provone: . + + test:WorkID a provone:Workflow ; + rdfs:label "YourWorkflow" ; + provone:hasSubProcess test:ProcessID02 ; + provone:hasSubProcess test:ProcessID . + + test:ProcessID a provone:Process ; + rdfs:label "YourProcess" ; + provone:hasSubProcess test:ProcessID02 . + + test:ProcessID02 a provone:Process ; + rdfs:label "YourProcess02" ; + provone:wasDerivedFrom test:ProcessID . +``` +Notice that all prefixes are set to test, and your user-defined domain appears in the first line. + +Now we could use a part of a real example to generate a sample output file. Notice that all `%20` below means space which automatically generated by the function if you type spaces in your title or manually input bar. They will return as spaces in output file and RDF diagram. +You can try to use these code as your input breaks: +``` r +# ________________________________________________________________________ +# CPP_Workflow {workflow_cpp provone:Workflow skos:altLabel="CPP%20Workflow:%20Workflow%20top%20level"}#### + +## ........................................................................ +## Recipe%20for%20CPP%20Data {gates_recipe prov:Plan skos:altLabel="Recipe%20document:%20Documentation%20on%20Recipe%20for%20CPP%20Data" rdfs:seeAlso= rdfs:seeAlso= cwfo:hasCode="MATLAB:%20TensorExplorationPARAFAC.m,%20YenerTensor.m"}#### + +## ........................................................................ +## Data%20Gathering {data_gathering provone:Program skos:altLabel="Workflow%20Phase%200"}#### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### Load%20Test%20Dataset {load_test_dataset provone:Program skos:altLabel="ANTHAyenerfinalmine.xlsx" rdfs:comment="Loading%20of%20initial%20matrix%20(output%20of%20YenerTensor.m)"}#### + +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### Test%20Dataset%20(TENSOR) {test_dataset_data provone:Data rdfs:comment="Test%20Dataset" cwfo:hasCode="MATLAB:%20YenerTensor.m"}#### + +## ........................................................................ +## Data%20Processing {data_processing provone:Program skos:altLabel="Workflow%20Phase%201"}#### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### Adjust%20APGAR%20Scores {adjust_apgar_scores provone:Program skos:altLabel="Ensure%20APGAR%20scores%20are%20between%200-10" rdfs:comment="MATLAB%20code:%20YenerTensor.m,%20Section%201" dcterms:conformsTo= cwfo:hasInData=test_dataset_data}#### + +#### ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .... +#### Adjusted%20APGAR%20Data {adjusted_apgar_data provone:Data rdfs:comment="Adjusted%20APGAR%20Data"}#### + +### .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. .. ... +### Choose%20Subjects {choose_subjects provone:Program skos:altLabel="Choose%20only%20subjects%20with%20all%20five%20time%20points" rdfs:comment="MATLAB%20code:%20YenerTensor.m,%20Section%201" dcterms:conformsTo= cwfo:hasInData=adjusted_apgar_data cwfo:hasOutData=growth_data}#### +``` +Then download `AssociationNames.txt` and `AssociationLibrary.txt` in `demos` forder, and paste them into your current working directory. With `sum_str(rdf="ttl",graph=TRUE,UserAL=FALSE,UserANM=FALSE,prefix="cwf",baseURI = "http://cwf.tw.rpi.edu/",UserID ="data#" )`, you could generate a output file as follows: +``` + @prefix test: . + @prefix provone: . + @prefix skos: . + @prefix prov: . + @prefix rdfs: . + @prefix cwfo: . + @prefix dcterms: . + + test:workflow_cpp a provone:Workflow ; + skos:altLabel "CPP Workflow: Workflow top level" ; + rdfs:label "CPP_Workflow" ; + prov:hadPlan test:gates_recipe ; + provone:hasSubProgram test:data_gathering ; + provone:hasSubProgram test:data_processing . + + test:gates_recipe a prov:Plan ; + skos:altLabel "Recipe document: Documentation on Recipe for CPP Data" ; + rdfs:seeAlso ; + rdfs:seeAlso ; + cwfo:hasCode "MATLAB: TensorExplorationPARAFAC.m, YenerTensor.m" ; + rdfs:label "Recipe for CPP Data" ; + + test:data_gathering a provone:Program ; + skos:altLabel "Workflow Phase 0" ; + rdfs:label "Data Gathering" ; + provone:hasSubProgram test:load_test_dataset . + + test:load_test_dataset a provone:Program ; + skos:altLabel "ANTHAyenerfinalmine.xlsx" ; + rdfs:comment "Loading of initial matrix (output of YenerTensor.m)" ; + rdfs:label "Load Test Dataset" ; + cwfo:hasOutData test:test_dataset_data . + + test:test_dataset_data a provone:Data ; + rdfs:comment "Test Dataset" ; + cwfo:hasCode "MATLAB: YenerTensor.m" ; + rdfs:label "Test Dataset (TENSOR)" ; + + test:data_processing a provone:Program ; + skos:altLabel "Workflow Phase 1" ; + rdfs:label "Data Processing" ; + provone:hasSubProgram test:adjust_apgar_scores ; + provone:hasSubProgram test:choose_subjects . +``` + +The code generates a graph representation of the resulting RDF such as this: + + + diff --git a/demos/AssociationLibrary.txt b/demos/AssociationLibrary.txt new file mode 100644 index 0000000..35a2465 --- /dev/null +++ b/demos/AssociationLibrary.txt @@ -0,0 +1,9 @@ +ParentClass,ChildClass,Ways,Property,ReverseProperty +"provone:Process","provone:Process",2,"provone:hasSubProcess","provone:wasDerivedFrom" +"provone:Process","provone:Data",2,"provone:wasDerivedFrom","provone:hasMember" +"provone:Process","provone:Visualization",2,"provone:wasDerivedFrom","provone:hasMember" +"provone:Workflow","prov:Plan",1,"prov:hadPlan"," " +"provone:Program","provone:Program",1,"provone:hasSubProgram","provone:wasDerivedFrom" +"provone:Workflow","provone:Program",1,"provone:hasSubProgram"," " +"provone:Program","provone:Data",1,"cwfo:hasOutData"," " +"provone:Workflow","provone:Process",1,"provone:hasSubProcess"," " diff --git a/demos/AssociationNames.txt b/demos/AssociationNames.txt new file mode 100644 index 0000000..b20d6c9 --- /dev/null +++ b/demos/AssociationNames.txt @@ -0,0 +1,23 @@ +AssociationName +provone:hasSubProcess +provone:sourcePToCL +provone:CLtoDestP +provone:hasInPort +provone:hasOutPort +provone:hasDefaultParam +provone:DLToInPort +provone:outPortToDL +provone:inPortToDL +provone:DLToOutPort +provone:wasAttributedTo +provone:wasDerivedFrom +provone:dataOnLink +provone:used +provone:wasGeneratedBy +provone:wasAssociatedWith +provone:wasInformedBy +provone:isPartOf +provone:hadMember +cwfo:hasOutData +cwfo:hasInData + diff --git a/demos/Creatgraph.gif b/demos/Creatgraph.gif new file mode 100644 index 0000000..60e4207 Binary files /dev/null and b/demos/Creatgraph.gif differ diff --git a/demos/Howtoadd.gif b/demos/Howtoadd.gif new file mode 100644 index 0000000..19e18d5 Binary files /dev/null and b/demos/Howtoadd.gif differ diff --git a/demos/Multiple Child entities.png b/demos/Multiple Child entities.png new file mode 100644 index 0000000..0f3d403 Binary files /dev/null and b/demos/Multiple Child entities.png differ diff --git a/demos/Multiple Parent entities.png b/demos/Multiple Parent entities.png new file mode 100644 index 0000000..99b7915 Binary files /dev/null and b/demos/Multiple Parent entities.png differ diff --git a/demos/RDF_output_file_2017_10_15_15_59_28.ttl b/demos/RDF_output_file_2017_10_15_15_59_28.ttl new file mode 100644 index 0000000..5f0bd4f --- /dev/null +++ b/demos/RDF_output_file_2017_10_15_15_59_28.ttl @@ -0,0 +1,54 @@ + @prefix rdfs: . + @prefix cwf: . + @prefix provone: . + + cwf:IrisWorkflow a provone:Workflow ; + provone:hasSubProcess cwf:IrisProcess ; + rdfs:label " Iris Workflow " . + + cwf:IrisProcess a provone:Process ; + hasInPort cwf:IrisDataIn ; + str:has cwf:VirginicaFilter + rdfs:label " Iris Process " . ; + provone:hasSubProcess cwf:VirginicaFilter ; + provone:hasSubProcess cwf:VirginicaFilter ; + provone:hasSubProcess cwf:VirginicaFilter2 ; + rdfs:label " Iris Process " . + + cwf:IrisDataIn a provone:InputPort ; + hasDefaultParam cwf:IrisData ; + rdfs:label " Iris Data in " . + + cwf:IrisData a provone:Data ; + rdfs:label " Iris Data " . + + cwf:VirginicaFilter a provone:Process ; + str:belongTo cwf:IrisProcess ; + rdfs:label " Test Filter " . + + cwf:VirginicaFilter a provone:Process ; + provone:wasDerivedFrom cwf:IrisProcess ; + rdfs:label " Test Filter2 " . + + cwf:VirginicaFilter a provone:Process ; + provone:wasDerivedFrom cwf:IrisProcess ; + provone:wasDerivedFrom cwf:VirginicaData ; + rdfs:label " Virginica Filter " . + + cwf:VirginicaData a provone:Data ; + provone:wasDerivedFrom cwf:VirginicaFilter ; + rdfs:label " Virginica data " . + + cwf:VirginicaFilter2 a provone:Process ; + provone:wasDerivedFrom cwf:IrisProcess ; + provone:wasDerivedFrom cwf:VirginicaData2 ; + rdfs:label " Virginica Filter 2 " . + + cwf:VirginicaData2 a provone:Data ; + provone:wasDerivedFrom cwf:VirginicaFilter2 ; + provone:beUsedFor cwf:IdProcess ; + rdfs:label " Virginica data 2 " . + + cwf:IdProcess a provone:Process ; + rdfs:label " Identical Process " . + diff --git a/demos/example RDF diagram.png b/demos/example RDF diagram.png new file mode 100644 index 0000000..d723f15 Binary files /dev/null and b/demos/example RDF diagram.png differ diff --git a/demos/final example.png b/demos/final example.png new file mode 100644 index 0000000..e7b7987 Binary files /dev/null and b/demos/final example.png differ diff --git a/demos/new RDF diagram.png b/demos/new RDF diagram.png new file mode 100644 index 0000000..e36b01f Binary files /dev/null and b/demos/new RDF diagram.png differ