From c17d4f208febf1cd608c666c1decd44fc07729cd Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Mon, 11 Apr 2016 18:47:33 +0200 Subject: [PATCH 01/26] New function for creating patient sets in transmart and retrieving the ID of the set through the REST api, based on a series of constraints --- DESCRIPTION | 2 +- R/RClientConnectionManager.R | 14 +- R/getPatientSetID.R | 688 ++++++++++++++++++++++++++++++++ bin/installCommands.R | 4 +- man/getHighdimData.Rd | 2 +- man/getObservations.Rd | 2 +- man/getPatientSetID.Rd | 112 ++++++ man/transmartRClient-package.Rd | 2 + 8 files changed, 817 insertions(+), 9 deletions(-) create mode 100644 R/getPatientSetID.R create mode 100644 man/getPatientSetID.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 213969f..1b6f927 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: R Client for accessing the tranSMART RESTful API Version: 0.3 Date: 2015-11-03 -Depends: RCurl, rjson, plyr, RProtoBuf, hash, reshape +Depends: RCurl, rjson, plyr, RProtoBuf, hash, reshape, XML Author: Tim Dorscheidt, Jan Kanis, Rianne Jansen Maintainer: Description: This package exposes tranSMART's RESTful API as a set of R functions. It uses tranSMART's OAuth authentication to access the data for which the user is authorized, and allows exploring and downloading the data. diff --git a/R/RClientConnectionManager.R b/R/RClientConnectionManager.R index 4ab6df2..69c90a5 100644 --- a/R/RClientConnectionManager.R +++ b/R/RClientConnectionManager.R @@ -263,15 +263,21 @@ function (oauthDomain = transmartClientEnv$transmartDomain, prefetched.request.t } .serverMessageExchange <- -function(apiCall, httpHeaderFields, accept.type = "default", progress = .make.progresscallback.download()) { +function(apiCall, httpHeaderFields, accept.type = "default", progress = .make.progresscallback.download(), + post.content.type = "", requestBody = "") { if (any(accept.type == c("default", "hal"))) { if (accept.type == "hal") { httpHeaderFields <- c(httpHeaderFields, Accept = "application/hal+json;charset=UTF-8") } + curlOptions <- list() + if (post.content.type != ""){ + httpHeaderFields <- c(httpHeaderFields, 'content-type' = post.content.type) + if(requestBody == ""){ stop("Missing body for POST request")} + curlOptions[["postfields"]] <- requestBody + } headers <- basicHeaderGatherer() result <- list(JSON = FALSE) + curlOptions <- c(curlOptions, list(httpheader = httpHeaderFields, headerfunction = headers$update)) result$content <- getURL(paste(sep="", transmartClientEnv$db_access_url, apiCall), - verbose = getOption("verbose"), - httpheader = httpHeaderFields, - headerfunction = headers$update) + verbose = getOption("verbose"), .opts = curlOptions) if (getOption("verbose")) { message("Server response:\n", result$content, "\n") } if(is.null(result)) { return(NULL) } result$headers <- headers$value() diff --git a/R/getPatientSetID.R b/R/getPatientSetID.R new file mode 100644 index 0000000..97ff5d4 --- /dev/null +++ b/R/getPatientSetID.R @@ -0,0 +1,688 @@ +# Copyright 2014, 2015, 2016 The Hyve B.V. +# Copyright 2014 Janssen Research & Development, LLC. +# +# This file is part of tranSMART R Client: R package allowing access to +# tranSMART's data via its RESTful API. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version, along with the following terms: +# +# 1. You may convey a work based on this program in accordance with +# section 5, provided that you retain the above notices. +# 2. You may convey verbatim copies of this program code as you receive +# it, in any medium, provided that you retain the above notices. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . + + +# Retrieve patient.set ID from tranSMART database, based on the constraints given by the user. +# Patient.set constraints are provided as an expression in the shape of, for example, +# (c1 | c2) & (c3|c4|c5) & c6 &... where c is either a constraint built up as {concept}{operator}{constraint_value} +# (e.g. "age" < 60) or a reference to a concept (e.g. "age") +getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = F){ + if(missing(study.name)){stop("Provide study name")} + if(missing(patientset.constraints)){stop("Provide patientset.constraints")} + + # retrieve the expression that defines the constraints + if(!is.call(patientset.constraints)){patientset.constraints <- substitute(patientset.constraints)} + patientset.constraints <- .checkPatientSetConstraints(patientset.constraints) + + message("Processing input...", "") + xmlQuery <- .buildXMLquery(patientset.constraints, study.name) + + # do POST request, and store result + message("\nCreating patient set...", "") + serverResult <- .transmartGetJSON("/patient_sets", requestBody = xmlQuery, + post.content.type ="text/xml;charset=UTF-8", onlyContent = c(201)) + + #return patient.set ID + patientsetID <- serverResult$id + + hrConstraints <- .makeHumanReadableQuery(xmlQuery) + + result <- list(patientsetID = patientsetID, patientsetSize = serverResult$setSize, + input_patientset.constraints = .expressionToText(patientset.constraints), + finalQueryConstraints = hrConstraints) + + message(paste("\nBased on the input, the following constraints were defined and sent to the server:\n", + result$finalQueryConstraints, sep = ""), "") + if(returnXMLquery){result[["xmlQuery"]] <- xmlQuery} + return(result) +} + + +.checkPatientSetConstraints <- function(patientsetConstraints){ + #test if it is expression and not a string. If string: try to parse + if(is.character(patientsetConstraints)){ + if(length(patientsetConstraints) > 1){ + stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. + The patient set constraints should be supplied in one single expression (or string).")} + + try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] + + if(length(patientsetConstraintsParsed) > 1){ + message(paste("Detecting a string as input for patient set constraints - expected is an expression,", + "such as: \"age\" > 65. \nWill attempt to parse the constraints out of the string, converting it", + "into an expression...")) + patientsetConstraints <- patientsetConstraintsParsed + } + }, silent = T + ) + } + return(patientsetConstraints) +} + + +# parse the constraints, and turn it into a query in XML format +.buildXMLquery <- function(patientset.constraints, study.name){ + # retrieve concept information for the given study, and only keep relevant columns. + # this will be used later to match the concepts supplied by the user as part of the constraint definition to concept + # paths. + studyConcepts <- getConcepts(study.name) + studyConcepts <- studyConcepts[, c("name", "fullName", "type", "api.link.self.href")] + studyConcepts <- .findEndLeaves(studyConcepts) + + ## parse the expression containing the constraints and translate this into a query definition in XML format + parsedConstraintsXMLlist <- .parsePatientSetConstraints(patientset.constraints, studyConcepts) + + # parsePatientSetConstraints returns a list with XML trees, these trees all either have items as top XMLnodes or + # panels. If the top nodes of the trees are items, add these items to a panel node and add this new node to a list. + if(xmlName(parsedConstraintsXMLlist[[1]]) == "item"){ + parsedConstraintsXMLlist <- .makePanelList(parsedConstraintsXMLlist) + } + + #add one panel with study.name, ensuring that only patients from the specified study are selected + parsedConstraintsXMLlist <- .addStudyPanel(parsedConstraintsXMLlist, study.name, studyConcepts[1, "fullName"]) + + # build XML formatted query + xmlQuery <- xmlNode("qd:query_definition", namespaceDefinitions = c(qd="http://www.i2b2.org/xsd/cell/crc/psm/1.1/")) + for(i in 1:length(parsedConstraintsXMLlist)){ + xmlQuery <- append.XMLNode(xmlQuery, parsedConstraintsXMLlist[[i]]) + } + + xmlQuery_asString <- saveXML(xmlQuery, prefix = '\n') + + if (getOption("verbose")) { message(xmlQuery_asString) } + + return(xmlQuery_asString) +} + + +# determine for each concept in the concept table whether a concept is an end leaf of the tree, ie. if it is a data node +# (which can be either a numeric, categorical or highdim node) +.findEndLeaves <- function(conceptListStudy){ + conceptTypes <- unique(conceptListStudy$type) + + if( any(! conceptTypes %in% c("CATEGORICAL_OPTION", "NUMERIC", "UNKNOWN", "HIGH_DIMENSIONAL"))){ + warning("Unexpected concept type for one or more concepts in the selected study. + Determination which concepts are end-leaves of the tree might not work correcty in all cases. + This only affects the patient selection query if concepts with undetermined type are included in the query. + In that case this message is followed by an accompanying error. + You can help fix it by contacting us. Type ?transmartRClient for contact details. + \n") + } + + # concepts with type numeric and high_dimensional are end-leaves, + # concepts with type categorical_options are not end-leaves + endLeaf <- "" + conceptListStudy <- cbind(conceptListStudy, endLeaf, stringsAsFactors = F) + conceptListStudy$endLeaf[conceptListStudy$type %in% c("NUMERIC", "HIGH_DIMENSIONAL")] <- "YES" + conceptListStudy$endLeaf[conceptListStudy$type == "CATEGORICAL_OPTION"] <- "NO" + + #find categorical data nodes, and set type of categorical end-leave (data node) to "CATEGORICAL_NODE" + # concepts with 'type' categorical_option are the concept values. Take the concept path of the concept values and + # remove the last part to retrieve a list of concept paths for categorical nodes. + categoricalOptionsPaths <- conceptListStudy$fullName[conceptListStudy$type == "CATEGORICAL_OPTION"] + categoricalNodes <- sub("\\\\[^\\]*\\\\$", "\\\\",categoricalOptionsPaths) # remove last part of concept path + # containing the categorical value, to obtain path to categorical node + categoricalNodes <- unique(categoricalNodes) + + conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "YES" + conceptListStudy$type[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "CATEGORICAL_NODE" + conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & !conceptListStudy$fullName %in% categoricalNodes] <- "NO" + + return(conceptListStudy) +} + +# parsePatientSetConstraints takes an expression defining the constraints for the patientset and returns +# either a list of item XMLtrees or list of panel XMLtrees +.parsePatientSetConstraints <- function(patientsetConstraints, studyConcepts){ + relationalOperators <- c("<", ">", "<=",">=", "==", "!=") + logicalOperators <- c("&","&&", "|", "||") + allowedOperators <- c(relationalOperators, logicalOperators) + + verbose <- getOption("verbose") + # construct a message that's used later on, when an error occurs. This message includes a listing of the different + # elements (sub units) of the constraint expression, if verbose == T + elementsMsg <- "" + if(verbose){ + subUnits <- "" + for(i in 1:length(patientsetConstraints)){ + subUnits <- paste(subUnits, paste("\n\tElement ", i,": ", .expressionToText(patientsetConstraints[[i]]), sep = "")) + } + elementsMsg <- paste("\nElements of the (sub)constraint after parsing", subUnits,sep = "") + } + + errorMsg <- paste("Incorrect (sub)constraint definition, or failure to parse the (sub)constraint definition correctly.", + "Check the format of the constraint. \nFor more details about how to specify patient set constraints,", + "see the help/manual page of this function. \n(Sub)constraint: ", + .expressionToText(patientsetConstraints), elementsMsg) + + # if length(patientsetConstraints) == 3, then the expression contains three elements, so it is either a low-level + # constraint of the form {concept}{constraint_operator}{constraint_value} or it is a concatenation of constraints + # separated by either an AND or OR operator (of form {some constraint(s)}{ &, &&, | or || }{some constraint(s)} ) + if(length(patientsetConstraints) == 3){ + constraintOperator <- as.character(patientsetConstraints[[1]]) + + if(!constraintOperator%in% allowedOperators){stop(errorMsg)} + constraint <- list() + + # in case where the (sub)constraint is a concatation of subconstraints, combined by an AND or OR operator + # (e.g. "age" > 12 & "sex" = "Female"): element [[1]] contains the AND or OR operator, element [[2]] the + # subconstraint to the left of the operator, element [[3]] is the subconstraint to the right of the operator + # in case that the (sub)constraint is not a concatenation of subconstraints, but holds a single criterium that a + # concept has to satisfy to: + # [[1]] contains a relational operator, [[2]] the concept, [[3]] the constraint value + is.singleConstraint <- constraintOperator %in% relationalOperators + if(is.singleConstraint){ + itemXMLlist <- list(.parseSingleConstraint(patientsetConstraints, studyConcepts)) + return(itemXMLlist) + }else{ + # it's a concatenation of constraints: call function again on the subconstraints. + # right now it only supports the format where the & operators are always the highest level operators and the | + # operators are only used as lowest level, forcing the format: (c1|c2)&c3&(c4|c5|c6|...)& ... + + treeBeforeOperator <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) + treeAfterOperator <- .parsePatientSetConstraints(patientsetConstraints[[3]], studyConcepts) + + #if there is an "OR" operation inbetween two subconstraints, the whole constraint cannot have an & anymore + # (this is for forcing the strict format for constraint definition described above) + if(constraintOperator == "|" ) { + if(grepl("&", .expressionToText(patientsetConstraints))){ + stop(paste("Wrong format of (sub)constraint definition. Found in (sub)constraint: ", + .expressionToText(patientsetConstraints), + "\nRight now the only format supported for defining patientset constraints is one where the & ", + "operator is on the highest level of the constraint definition \nand the | (or) operator on the ", + "lowest (second) level, \nie. the format is 'x1' or (in case of multiple \'&\' operations) ", + "'x1 & x2 & ...', \nwhere x1, x2, etc. can contain one or more subconstraints (called c here) ", + "separated by an | (or) operator, ie. x = c1 or x = c1 | c2 | ...,", + "\n where c is a single constraint such as \'\"age\" < 60\' or a reference to a concept.", + "\n Examples of valid constraints: c1, c1|c2, c1&c2&c3, (c1|c2)&c3&(c4|c5|c6)", sep = "" + )) + } + itemXMLlist <- c(treeBeforeOperator,treeAfterOperator) + return(itemXMLlist) + } + + # treeBeforeOperator/treeAfterOperator can be either a list of items or a list of panels + # if it contains a list of items: add the items of that list to a panel node + if(constraintOperator == "&"){ + if(xmlName(treeBeforeOperator[[1]]) == "item"){ + beforePanels <- .makePanelList(treeBeforeOperator) + } + if(xmlName(treeBeforeOperator[[1]]) == "panel"){ + beforePanels <- treeBeforeOperator + } + if(xmlName(treeAfterOperator[[1]]) == "item"){ + afterPanels <- .makePanelList(treeAfterOperator) + } + if(xmlName(treeAfterOperator[[1]]) == "panel"){ + afterPanels <- treeAfterOperator + } + + panelList <- c(beforePanels, afterPanels) + return(panelList) + } + } + }else if(class(patientsetConstraints) == "("){ + # expression is surrounded by brackets: take expression between brackets and call function again + # element [[2]] contains the expression between the brackets, element [[1]] is '(' + xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) + return(xmlTreeList) + }else if(length(patientsetConstraints) == 1) { + # Then the (sub)constraint should consist of only a specification of a concept. + # This will result in selection of all patients that have a value for this concept. + # Concept specification can be a string containing a pattern to match to the concept name or a concept path or link, + # or an object (variable) that contains such a string (only single string). + + #retrieve concept path + conceptPath <- .getConstraintConcept(patientsetConstraints, patientsetConstraints, studyConcepts, + identical.match = F, testIfEndLeave = F)[["conceptPath"]] + + # make itemTree for that concept + itemXMLlist <- list(xmlNode("item", xmlNode("item_key", .makeItemKey(conceptPath)))) + return(itemXMLlist) + }else{ + stop(errorMsg) + } +} + +# the deparse function converts expressions to strings. However it cuts the strings of at a certain bytelength, +# so a long expression could result in a character vector with several portions of the original expression +# this function makes one string out of the vector again +.expressionToText <- function(expression){ + textExpression <- deparse(expression, width.cutoff = 500) + + if(length(textExpression)>1){ + textExpressionPasted <- gsub("^[[:blank:]]+", "", textExpression) + textExpressionPasted <- paste(textExpressionPasted, collapse = "") + + #warnings are truncated, so it doesn't necessarily print all in case of long textExpression + message(paste("While trying to convert an expression to text, the deparse function cut an expression in two.", + "\nSeparate parts:\n", paste("\t", textExpression, collapse = "\n AND \n"), + "\nThese are pasted again together. Result:\n ", textExpressionPasted)) + textExpression <- textExpressionPasted + } + return(textExpression) +} + + +.makeHumanReadableQuery <- function(xmlQuery){ + parsedXML <- "" + xmlQuery <- strsplit(xmlQuery, "\n")[[1]] + panelCount <- 0 + itemCount <- 0 + invert <- "0" + for(i in 1:length(xmlQuery)){ + oneLine <- xmlQuery[[i]] + + if(grepl("",oneLine)){ + if(panelCount > 0 ){parsedXML <- paste(parsedXML, " & \n(", sep = "") + }else{parsedXML <- paste(parsedXML, "(", sep = "")} + panelCount <- panelCount + 1 + itemCount <- 0 + } + if(grepl("", oneLine)){ + invert <- .removeXMLmarkUp(oneLine, "invert") + invert <- gsub("[[:blank:]]", "",invert) + if(invert == "1"){ parsedXML <- paste(parsedXML, "!(", sep = "")} + } + if(grepl("",oneLine)){ + if(invert == "1"){ parsedXML <- paste(parsedXML, "))", sep = "") + }else{parsedXML <- paste(parsedXML, ")", sep = "")} + } + if(grepl("",oneLine)){ + if(itemCount > 0 ){parsedXML <- paste(parsedXML, " | ", sep = "")} + itemCount <- itemCount + 1 + } + if(grepl("",oneLine)){ + #get concept path + item_key <- .removeXMLmarkUp(oneLine, "item_key") + concept_path <- gsub("\\\\\\\\Public Studies", "", item_key) + concept_path <- gsub("\\\\\\\\Private Studies", "", concept_path) + parsedXML <- paste(parsedXML, "\"", concept_path, "\"", sep = "") + } + if(grepl("",oneLine)){ + valueOperator <- .removeXMLmarkUp(oneLine, "value_operator") + parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") + } + if(grepl("",oneLine)){ + valueConstraint <- .removeXMLmarkUp(oneLine, "value_constraint") + parsedXML <- paste(parsedXML, " ", valueConstraint, " ", sep = "") + } + } + return(parsedXML) +} + + +.removeXMLmarkUp <- function(string, markUpString){ + string <- gsub(paste("^[[:blank:]]*<", markUpString,">", sep = ""), "", string) + string <- gsub(paste("", sep = ""), "", string) + return(string) +} + + +#just needs one conceptPath, can be of any of the concepts in the study. It can be any path in column 'fullName' +.addStudyPanel <- function (constraintXMLlist, study.name, conceptPath){ + # retrieve the path for the study concept, by taking only the first part of the supplied concept path up to and + # including the study.name. + # e.g. take "\\Public Studies\\GSE8581\\" from "\\Public Studies\\GSE8581\\Subjects\\Ethnicity\\Afro American\\" + splitPath <- strsplit(conceptPath, "\\\\")[[1]] + nameHit <- grep(study.name, splitPath, ignore.case = T)[1] # take the first, just in case the study.name is repeated + # in later part of path + studyPath <- paste(c(splitPath[1:nameHit], ""), collapse = "\\", sep = "") + itemKey <- .makeItemKey(studyPath) + + panel <- xmlNode("panel", + xmlNode("invert", 0), + xmlNode("item", + xmlNode("item_key", itemKey))) + constraintXMLlist <- c(constraintXMLlist, list(panel)) + return(constraintXMLlist) +} + + +# it expects a list of "item" XML trees. It will add all items to a panel XML node, +# and returns that node as part of a list +.makePanelList <- function(itemXMLtreeList){ + panel <- xmlNode("panel", xmlNode("invert", 0)) + for(i in 1:length(itemXMLtreeList)){ + panel<- append.XMLNode(panel, itemXMLtreeList[[i]]) + } + panel <- list(panel) + return(panel) +} + +# constraint is of format: {concept definition}{relational operator}{constraint_value}, e.g. "age" < 12. +.parseSingleConstraint <- function(patientsetConstraints, studyConcepts){ + constraint <- list() + + # grab the different elements of the constraint definition + constraint$operator <- as.character(patientsetConstraints[[1]]) + constraint$concept <- patientsetConstraints[[2]] + constraint$value <- patientsetConstraints[[3]] + + if(class(constraint$value) == "name"){ + tmpValue <- try(eval(constraint$value, envir = globalenv()), silent = T) + if(class(tmpValue) == "try-error"){ + try_error <- attr(tmpValue, "condition")$message + err_message <- paste(try_error, ". Object was specified in (sub)constraint ", + .expressionToText(patientsetConstraints) , ".\n", sep = "") + stop(err_message) + } + if(length(tmpValue) >1){ + tryCatch(stop(paste("Incorrect input for constraint_value in (sub)constraint: ", + .expressionToText(patientsetConstraints), ".\nObject length of \'", constraint$value , + "\' is larger than 1.", + "Only a single input value (string/number) is allowed as a constraint_value.", + "\nInput for constraint_value: ", sep = "") , call. = F), finally = print(tmpValue)) + } + constraint$value <- tmpValue + } + + # a concept can be defined by a pattern matching the concept name (1), by concept.link(2), concept.path(3) or + # by giving a variable/object containing a string with one of those three + # find the concept path that corresponds to the concept, and determine the type of + # node (numerical, categorical or high dim) + constraint <- c(constraint, .getConstraintConcept(constraint$concept, patientsetConstraints, studyConcepts, + identical.match = F)) + constraint$value_operator <- NA + constraint$value_type <- NA + + if(constraint$conceptType == "NUMERIC"){ + #check if the supplied constraint value is numeric + if(!is.numeric(constraint$value)){ + stop(paste("The supplied constraint value ", deparse(constraint$value)," is not numerical, while concept ", + constraint$conceptPath, " is a numerical concept. (This was the concept selected based on the input: \'", + constraint$concept, "\'). \nEncountered in (sub)constraint: ",.expressionToText(patientsetConstraints), + sep = "" )) + } + + # Each individual constraint is represented as an "item" in the XML tree that holds the query definition for the + # patient.set + # construct the "item" subtree for the current constraint + constraint$item_key <- .makeItemKey(constraint$conceptPath) + constraint$value_type <- "NUMBER" + #translate relational operator from R to a value operator that can be recognized in the query + constraint$value_operator <- .getValueOperator(constraint$operator, "NUMERIC") + constrain_by_value_tree <- xmlNode("constrain_by_value", + xmlNode("value_operator", constraint$value_operator), + xmlNode("value_constraint", constraint$value), + xmlNode("value_type", constraint$value_type)) + itemXMLtree <- xmlNode("item", + xmlNode("item_key", constraint$item_key), + constrain_by_value_tree) + } + + if(constraint$conceptType == "CATEGORICAL_NODE" ){ + + #check if supplied constraint value is character + if(!is.character(constraint$value)){ + warning(paste("The supplied constraint value ", constraint$value," is not of class \'character\', while concept ", + constraint$conceptPath, " is a categorical concept (ie. containing text).", + "\n(This was the concept selected based on the input: \'", + constraint$concept, "\')", "\nWill convert the value to character, but unless there is actually a ", + "categorical value that matches the constraint value, this will result in an error later on.", + "\nEncountered in (sub)constraint: ",.expressionToText(patientsetConstraints), + sep = "" )) + constraint$value <- as.character(constraint$value) + } + + #check if the given constraint value exists for the specified categorical concept + constraintValuePath <- .getConstraintConcept(constraint$value, patientsetConstraints, studyConcepts, + identical.match = T, testIfEndLeave = F)[["conceptPath"]] + if(constraintValuePath != paste(constraint$conceptPath, constraint$value, "\\", sep = "")){ + stop(paste("Incorrect (sub)constraint definition for (sub)constraint:\'", .expressionToText(patientsetConstraints), + "\'.", "\nThe constraint value \'", constraint$value,"\' does not seem to be an existing value ", + "of the categorical concept \'", constraint$concept, "\'.", + "\nConcept path: ", constraint$conceptPath,"\nPath to contstraint value: ", + constraintValuePath, sep= "")) + } + + #translate relational operator from R to a value operator that can be used in the query + #only EQ and NE are possible for text variables. Only EQ is supported right now + constraint$value_operator <- .getValueOperator(constraint$operator, "CATEGORICAL_NODE") + + # construct the "item" subtree for the current constraint + if(constraint$value_operator == "EQ"){ + itemXMLtree <- xmlNode("item", xmlNode("item_key", .makeItemKey(constraintValuePath))) + } + if(constraint$value_operator == "NE"){ + stop("For now the '!=' operation is not supported for categorical values") + ##implement later? So that if you specify conceptX != A then it automatically selects all possible categorical + # values in conceptX, except A. (you can't just use invert=1, for example trial_group != control | x < 1) + # or trial_group != control | lung_abnormal == "YES" should work too) + } + } + + if(constraint$conceptType == "HIGH_DIMENSIONAL"){ + # you cannot apply relational operations to the high dimensional node + stop(paste("Incorrect use of a high dimensional data node in (sub)constraint: ", + .expressionToText(patientsetConstraints),".", + "\nYou can only use high dimensional nodes for defining patient sets by supplying the node name ", + "alone (e.g. \"mRNA day1\"); you cannot apply a relational operation (such as \"mRNA day1 < 0\")", + "to the node. \nIf you supply the high dimensional node name, ", + "all patients that have data for that high dimensional node will be selected.", sep = "")) + } + + if(is.na(constraint$value_operator)){ + stop(paste("Could not determine which value_operator to use in the query definition for the constraint \'", + .expressionToText(patientsetConstraints), "\'. Operator supplied by user: ", constraint$operator, + sep = "" )) + } + + return(itemXMLtree) +} + + +#construct item_key from concept path +# expected format item key:\\Dimension\concept_path. Examples: +# \\Public Studies\Public Studies\Cell-line\Demographics\Age\ +# \\Private Studies\Private Studies\Cell-line\Characteristics\Age\ +.makeItemKey <- function(conceptPath){ + dimension <- strsplit(conceptPath, "\\\\")[[1]][2] #get first part of the concept path, that is either public or private study + + if(!dimension %in% c("Public Studies", "Private Studies")){ + stop("Could not determine the dimension for the item_key, that is used for the XML query")} + item_key <- paste("\\\\", dimension, conceptPath, sep = "") + return(item_key) +} + + +#translate relational operators to a text representation as is expected for the query +.getValueOperator <- function(operator, type){ + if(type == "NUMERIC"){ + if(operator == "<"){return("LT")} + if(operator == "<="){return("LE")} + if(operator == ">"){return("GT")} + if(operator == ">="){return("GE")} + if(operator == "=="){return("EQ")} + if(operator == "!="){return("NE")} + } + + if(type == "CATEGORICAL_NODE"){ + if(operator %in% c("<", "<=", ">", ">=")){ + stop(paste("The operation \'", operator, "\' is not supported for text variables.", sep = ""))} + if(operator == "=="){return("EQ")} + if(operator == "!="){return("NE")} + } + + #if the function did not return yet, something went wrong. + stop(paste("Something went wrong with determining the value_operator to use for the query definition. Operator:", + operator,". Value type: ", type, sep = "")) +} + + +# find the concept path for a given concept definition. Concept can be specified as pattern matching a +# concept name, or as a partial/full concept path or link +.getConstraintConcept <- function(concept, subconstraint, studyConcepts, identical.match = F, testIfEndLeave = T){ + info <- "Correct way to supply a concept (as part of a (sub)constraint) is: + either directly as a string, containing the concept name or path, + or indirectly as an object (variable) that contains a string with the concept name or path. + Supplying a concept link as found in the column \'api.link.observations.href\' of the data.frame retrieved by + getConcepts() should also work. + Example: if you want to select patients younger than 12, supply \"age\" directly as as string: \"age\" < 12 + or indirectly: concepts[2] < 12, where concepts[2] contains the string \"age\"." + + subconstraint <- .expressionToText(subconstraint) + + #if not string: get the value of the variable/object. Value should be one string. + if(class(concept) == "name"){ + result <- try(eval(concept, envir = globalenv()), silent = T) + if(class(result) == "try-error"){ + try_error <- attr(result, "condition")$message + err_message <- paste(try_error, ". Object was specified in subconstraint ", subconstraint, ".\n", info, sep = "") + stop(err_message) + } + if(length(result) >1){ + tryCatch(stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, + ".\nObject length of \'", concept , + "\' is larger than 1. Only a single string is allowed for specifying the concept.", + "\nInput for concept: ", sep = "") , call. = F), finally = print(result)) + } + concept <- result + } + #concept should be a string. + if(!is.character(concept)){ + stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, ".\n", info, sep = "")) + } + + orig_concept <- concept + + if(identical.match) { + concept <- paste("^", concept, "$", sep = "") + concept <- gsub("^^", "^", concept, fixed = T) + concept <- gsub("$$", "$", concept, fixed = T) + } + + is.concept.path <- grepl("\\\\", concept) + conceptMatch <- character(0) + if(!is.concept.path){ + #concept paths are in 'fullName' column of getConcepts result + conceptMatch <- grep(concept, studyConcepts$name, ignore.case = !identical.match) + + if(length(conceptMatch) > 1){ + conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, concept_list = studyConcepts$name) + } + } + + if(length(conceptMatch) == 0){ + # supplied concept migth be concept path or link. + is.concept.link <- grepl("^/studies/.+/concepts/", concept) | grepl("^studies/.+/concepts/", concept) + + if(is.concept.path & is.concept.link){stop( + paste( "Something went wrong with detecting whether the provided string \'", concept, + "\' is a concept path or concept link. Please check if the provided string is correct.", + "\nTo check this, you can look at the resulting data.frame of getConcepts(YOUR_STUDY_NAME).", + "\nThe concept paths that can be used for this study can be found in the \'fullName\' column,", + "and the concept links in the \'api.link.self.href\' column", + "If the string does have the correct format, you may have encountered a bug.", + "\nYou can help fix it by contacting us. Type ?transmartRClient for contact details.", sep = "")) + } + + if(is.concept.path){ + conceptMatch <- grep(concept, studyConcepts$fullName, fixed = T) + if(length(conceptMatch) > 1){ + conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, + concept_list = studyConcepts$fullName) + } + } + + if(is.concept.link){ + message("Detecting a concept.link. Will attempt to find matching concept path.") + conceptMatch <- grep(concept, studyConcepts$api.link.self.href) + if(length(conceptMatch) > 1){ + conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, + concept_list = studyConcepts$api.link.self.href) + } + } + } + + identicalM <- "" + if(identical.match){identicalM <- "identical(literal) "} + if(length(conceptMatch) == 0){ + stop(paste("No ", identicalM, "match found for concept or categorical value \'", orig_concept, + "\', found in subconstraint: ", subconstraint, + "\nNote: The supplied concept in the constraint definition can be a full or partial ", + "match to the concept name (and can even contain regular expressions: pattern matching will be done as", + " done for the grep function, ignoring case), or it can be a concept.link or a concept.path.", + "\nIn case of a categorical concept; the value part of the constraint has to be a literal match to one", + " of the possible categorical values for that concept." , sep = "")) + } + + #test if matches are endLeaves, ie. a data node. + # If constraints are supplied in the form of {concept}{operator}{constraint_value}, the concept should be an end leave + # (ie. data node), either categorical or numerical, and if it's categorical it should be an end leave and not a + # categorical value. If only a concept is supplied as a constraint, it is possible to also use other concepts that + # are not end leaves - in that case testIfEndLeave should be FALSE. + is.endLeaf <- studyConcepts$endLeaf[conceptMatch] == "YES" + + if(!is.endLeaf & testIfEndLeave){ + stop(paste("The supplied concept \'", concept, "\' is not a data node (ie. not an end leaf of the transmart tree).", + "The supplied concept name/path/link must point to a single numerical, categorical or high dimensional", + " data node (end leaf).", sep = "")) + } + + matched_concept = list(conceptPath = studyConcepts$fullName[conceptMatch], + conceptType = studyConcepts$type[conceptMatch]) + message(paste("Matched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, + "\'\n to concept (full path): \'", matched_concept$conceptPath, "\'\n", sep = "") ) + return(matched_concept) +} + + +#called by .getConstraintConcept if there were initially multiple matches found for the concept, using the 'grep' function +.selectMatch <- function(concept, matching_indices, concept_list){ + #any literal, full length matches? (ignoring case) + literalMatches <- tolower(concept_list[matching_indices]) == tolower(concept) + if(any(literalMatches)){ + matching_indices <- matching_indices[literalMatches] + if(length(matching_indices) > 1){ + stop(paste("There seem to be more than one concepts with the name \'", concept, "\'.", + "\nPlease use the concept path instead of the concept name to specify the concept.", + "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).", sep = "")) + } + message(paste("Multiple matching concepts found for the string \'", concept,"\'. One identical match was found: \'", + concept_list[matching_indices], "\'.\nThis match is selected.", + "\nFor more precise matching use full-length concept names, paths, or links,", + " and/or include beginning/end of string symbols (^/$) - see ?regexp", sep = "")) + } + + #if not literal match take the shortest match + if(!any(literalMatches)){ + paths_tmp<- concept_list[matching_indices] + shortest_match<- matching_indices[which.min(nchar(paths_tmp))] + matching_indices<- shortest_match + message(paste("Multiple matching concepts found for the string \'", concept,"\', selecting shortest match: \'", + paste(concept_list[shortest_match], collapse = ","), "\'.", + "\nFor more precise matching use full-length names or paths,", + " and/or include beginning/end of string symbols (^/$) - see ?regexp", sep = "")) + if(length(matching_indices) > 1){ + stop(paste("There are multiple shortest matches for \'", concept, "\'. Matches: ", + paste(concept_list[shortest_match], collapse = ", "), ".", + "\nPlease use a more specific/longer string for specifying the concept name or path,", + "or use the (full) concept path instead of the concept name to specify the concept.", + "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).", sep = "")) + } + } + return(matching_indices) +} + \ No newline at end of file diff --git a/bin/installCommands.R b/bin/installCommands.R index 78394a6..467e66f 100644 --- a/bin/installCommands.R +++ b/bin/installCommands.R @@ -25,9 +25,9 @@ # Notes for first time installers: -# The package transmartRClient depends on five packages: RCurl, rjson, RProtoBuf, plyr, hash, and reshape. +# The package transmartRClient depends on seven packages: RCurl, rjson, RProtoBuf, plyr, hash, reshape and XML. # You can install them as follows: -install.packages(pkgs=c("RCurl", "rjson", "RProtoBuf", "plyr", "hash", "reshape")) +install.packages(pkgs=c("RCurl", "rjson", "RProtoBuf", "plyr", "hash", "reshape", "XML")) # RProtoBuf depends on the system protobuf headers. For Ubuntu you will need to # install the libprotoc-dev and libprotobuf-dev packages. diff --git a/man/getHighdimData.Rd b/man/getHighdimData.Rd index 5a6deac..6c2465c 100644 --- a/man/getHighdimData.Rd +++ b/man/getHighdimData.Rd @@ -104,7 +104,7 @@ If no projection is specified this function returns a list of the projections av \author{Tim Dorscheidt, Jan Kanis, Rianne Jansen. Contact: development@thehyve.nl} \note{To be able to access a transmart database, you need to be connected to the server the database is on. If you haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} -\seealso{\code{\link{hash}, \link{highdimInfo}, \link{getStudies}, \link{getConcepts}}.} +\seealso{\code{\link{hash}, \link{highdimInfo}, \link{getStudies}, \link{getConcepts}}, \link{getPatientSetID}.} \examples{ \dontrun{ diff --git a/man/getObservations.Rd b/man/getObservations.Rd index 8deac55..3c9f9b3 100644 --- a/man/getObservations.Rd +++ b/man/getObservations.Rd @@ -31,7 +31,7 @@ getObservations(study.name, concept.match = NULL, concept.links = NULL, as.data. \references{} \author{Tim Dorscheidt, Jan Kanis, Rianne Jansen. Contact: development@thehyve.nl} \note{To be able to access a transmart database, you need to be connected to the server the database is on. If you haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} -\seealso{\code{\link{getStudies}, \link{getConcepts}}} +\seealso{\code{\link{getStudies}, \link{getConcepts}, \link{getPatientSetID}}} \examples{ \dontrun{ # The following will retrieve a list with observations for the study "foo" diff --git a/man/getPatientSetID.Rd b/man/getPatientSetID.Rd new file mode 100644 index 0000000..ef7b80f --- /dev/null +++ b/man/getPatientSetID.Rd @@ -0,0 +1,112 @@ +\name{getPatientSetID} +\alias{getPatientSetID} + +\title{ +Define a patient set based on a series of constraints and retrieve the patient.set ID +} +\description{ +This function can be used to create a patient.set in tranSMART, based on a set of constraints, and it returns the ID of the created patient.set. This ID can be used in other functions, such as \code{\link{getObservations}} and \code{\link{getHighDimData}} to retrieve the data for only the patients that belong to that specific patient.set.\cr +The function returns a list with the ID of the newly created patient.set, the size of the created patient.set, the original user input specifying the constraints and the interpretation of that input, containing the constraints that were sent to tranSMART(called "finalQueryConstraints"). Optionally, the body of the POST request can be returned as well. This body contains the query definition in XML format as it is sent to tranSMART. +} +\usage{ +getPatientSetID(patientset.constraints, study.name, returnXMLquery = FALSE) +} + +\arguments{ + \item{study.name}{a character string giving the name of a study} + \item{patientset.constraints}{the definition of the patient.set constraints: an expression containing all criteria (constraints) that the patients in the patient.set have to meet, or an object containing one such an expression as created with function \code{\link{substitute}}. This expression can contain one or more constraints, concatenated by the "AND" (\code{&}) and OR (\code{|}) operators, and has to meet a strict format. \cr +A single constraint can either be: +\enumerate{ +\item a string with a reference to a concept; ie. a pattern/regular expression matching a concept name, or a partial/full concept path or link, as can be found in the \code{"fullName"} and \code{"api.link.self.href"} column of the concept table retrieved by \code{getConcepts("STUDY_NAME"))}. The pattern or string is used to match it to a concept name/path/link from the \code{getConcepts(study.name)} table and return the full concept path for the matching concept. + If only single node is supplied as a constraint, without constraint operator and constraint value, then any study concept (node) can be used, and even the values of categorical concepts. All patients with a (non-missing) value for that concept will be selected. If concept paths are used, then strings are taken literally, as in \code{\link{grep}} with \code{fixed = T}, meaning it is case sensitive and regular expressions cannot be used. +\item or a constraint of the format {reference to concept}{relational_operator}{constraint_value}, e.g. "age" < 65. All patients that meet the criterium are selected. In this case it is not possible to use just any concept: the concept has to be a numerical or categorical data node (end leaf). Selection on values of a high dimensional node is not supported. + } +References to concepts, and constraint_values can also be passed as an object (variable) with one element, being a string or a string/number respectively. E.g. if \"code{concepts = c("age", "sex", "ethnicity")} supplying \code{concepts[[1]] < 65} will also work. + +Multiple (sub)constraints can be combined into a larger constraint by use of the "AND" (\code{&}) and OR (\code{|}) operators. Note: this has to adhere to a strict format: if both "AND" and "OR" operators are used in a single patient.set constraints definition, the "AND" operator always has to be on the highest/outer level and the "OR" operator has to be on the lower/inner level. E.g. if constraints c1, c2, etc..., are concatenated with both "AND" and "OR" operators, then the whole constraint has to be of the format \code{X1 & X2 & X3}, with \code{Xi} being a concatenation of one or more (sub)constraints separated by the "OR" operator (and surrounded by brackets), eg. \code{Xi = (c1 | c2 | c3)} . A complete constraint, consisting of a concatenation of subconstraint, can for example be: \code{(c1 | c2) & (c3 | c4 | c5) & c6 & ...} or \code{c1}, \code{c1 | c2 | c3}, \code{c1 & c2}. \cr +Example: "sex"== "female" & ("age" > 65 | "blood_pressure" > 140) & ("diagnosis" == "diabetes" | "diagnosis" == "prediabetic") + + See also \code{\link{substitute}} for creating expressions in case you want to store expressions in a variable before calling getPatientSetID. E.g. \code{getPatientSetID("SOME STUDY", "age" < 65)}, will have the same result as: \code{my_expression <- substitute("age" < 65) ; getPatientSetID("SOME STUDY", my_expression)}. Alternatively, the constraints can also be given as a string (\code{getPatientSetID("SOME STUDY", "\"age\" < 65")} or \code{age_concept<- "age"; getPatientSetID("SOME STUDY", "age_concept < 65") }), but this might not be fully supported. If the constraints are supplied as a string, things that should be interpreted as text should be quoted, this applies for example to the concept names/paths/links and the categorical values (e.g. "\"age\" > 65"), else this will be interpreted as a variable name and the function will try to find a variable with that name in the global environment and use the value stored in that variable.} + \item{returnXMLquery}{If TRUE the body of the POST is request is part of the returned list. This contains the query definition in XML format.} + +} + +\details{ +If a constraint is of the form \{reference to concept\}\{relational_operator\}\{constraint_value\}, the following operations are possible: +\itemize{ +\item For numerical data nodes the relational_operations "<", ">", "<=",">=", "==" and "!=" can be used. +\item For categorical data nodes, only the "==" operation is possible. +} +some examples: \cr +correct format: \code{( "age" < 65 | "sex" == "Female") & "test" & ("test" == 1 | "test" == 2 | "test" == 3) } \cr +wrong format: \code{( "age" < 65 | "sex" == "female") & ("test" == 1 | ("test2" < 2 & "test3" == 3)) } (the & after '"test2" < 2' is on a lower/more inner level than the |) \cr +wrong format: \code{(( "age" < 65 | "sex" == "female") & ("test" == 1)) | "test" == 4 } (the | before the '"test4" == 4' is on a higher/more outer level than the &) +} + +\value{ +A list with the ID of the newly created patient.set, the size of the created patient.set, the original user input specifying the constraints and the interpretation of that input, containing the constraints that were sent to tranSMART and optionally the body of the POST request. +\item{patientsetID}{a numerical value containing the ID of the newly created patient.set. This patient.set ID can be used in other functions, such as \code{\link{getObservations}} and \code{\link{getHighDimData}} to retrieve the data for only the patients that belong to that specific patient.set} +\item{patientsetSize}{a numerical value specifying the number of patients in the created patient.set} +\item{input_patientset.constraints}{a character string containing the input by the user} +\item{finalQueryConstraints}{a character string representing the interpretation of the user input, containing the the constraints that were sent to tranSMART. This part of the output can be used to check if indeed the right concepts were selected, based on the input. For concepts the full concept path is given and relational operators are represented by text: "<" is represented by "LT", ">" by "GT", "<=" by "LE",">=" by "GE", "==" by "EQ" and "!=" by "NE". \crNote: the query constraints will always include a study concept path as well; this is added to ensure only patients from the supplied study are selected. Note 2: if a constraint was supplied for a categorical node in the form of \{concept\}\{relational_operator\}\{categorical value\}, e.g. "sex" == "female", only the path to the categorical value is represented (e.g. '\\Public Studies\\SOME STUDY\\Subjects\\Sex\\female').} +\item{xmlQuery}{a character string containing the body of the POST request that is sent to the tranSMART instance. This body contains the query definition in XML format as it is sent to tranSMART. This is only returned if returnXMLquery = T.} +} +\references{ +} +\author{ +Tim Dorscheidt, Jan Kanis, Rianne Jansen. +Contact: development@thehyve.nl} +\note{To be able to access a transmart database, you need to be connected to the server the database is on. If you haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} + + +\seealso{ \code{\link{getHighdimData}} and \code{\link{getObservations}} } +\examples{ +\dontrun{ + # obtain a list of all studies in the database + studies <- getStudies() + + # the following call will give all concepts for GSE8581 + concepts <- getConcepts("GSE8581") + + ## create patient.set + + getPatientSetID("GSE8581", "age" < 65) + + #or: + + my_concepts <- c("Age", "Sex", "Lung Disease") + constraint_value <- 65 + getPatientSetID("GSE8581", my_concepts[[1]] < constraint_value) + + #multiple constraints can be combined: + getPatientSetID("GSE8581", "Age" < 65 & "Sex" == "female" & ("Lung Disease" == "chronic obstructive pulmonary disease" | + "Lung Disease" == "control")) + + + # there are multiple ways the patient.set constraints can be supplied. The following will have the same result: + # 1 as expression + getPatientSetID("GSE8581", "age" < 65) + + # 2 as an object (variable) containing a single expression + my_expression <- substitute("age"< 65) + getPatientSetID("GSE8581", my_expression) + + #3 as string. + # supplying concept name as string: + getPatientSetID("GSE8581", "\"age\" < 65") + # or if concept name is stored in an object (variable): + age_concept<- "age" + getPatientSetID("GSE8581", "age_concept < 65") + + + #retrieve clinical observation data for patient.set + + #retrieve high dim data for patient.set + + }. + + +} + +\keyword{ database } +\keyword{ transmart } diff --git a/man/transmartRClient-package.Rd b/man/transmartRClient-package.Rd index 9963469..da4e224 100644 --- a/man/transmartRClient-package.Rd +++ b/man/transmartRClient-package.Rd @@ -30,6 +30,8 @@ and high dimensional data (to which the user is authorized to access). \code{\link{getConcepts}}\cr \code{\link{getObservations}}\cr \code{\link{getHighdimData}}\cr + \code{\link{getPatientSetID}}\cr + } } From 98b66ccf58d3c3aeddc4faac1515d8bc591b4902 Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Mon, 11 Apr 2016 18:53:37 +0200 Subject: [PATCH 02/26] :fish: --- R/getPatientSetID.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/getPatientSetID.R b/R/getPatientSetID.R index 97ff5d4..88d843f 100644 --- a/R/getPatientSetID.R +++ b/R/getPatientSetID.R @@ -685,4 +685,3 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = } return(matching_indices) } - \ No newline at end of file From 6c83f89bbc4dea4be0961abc3fceeeac2f0a4d75 Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Tue, 12 Apr 2016 13:08:52 +0200 Subject: [PATCH 03/26] small change --- R/getPatientSetID.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/getPatientSetID.R b/R/getPatientSetID.R index 88d843f..f3af385 100644 --- a/R/getPatientSetID.R +++ b/R/getPatientSetID.R @@ -389,11 +389,11 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = stop(err_message) } if(length(tmpValue) >1){ - tryCatch(stop(paste("Incorrect input for constraint_value in (sub)constraint: ", - .expressionToText(patientsetConstraints), ".\nObject length of \'", constraint$value , - "\' is larger than 1.", - "Only a single input value (string/number) is allowed as a constraint_value.", - "\nInput for constraint_value: ", sep = "") , call. = F), finally = print(tmpValue)) + message("\nInput for constraint_value: ") + print(tmpValue) + stop(paste("Incorrect input for constraint_value in (sub)constraint: ", .expressionToText(patientsetConstraints), + ".\nObject length of \'", constraint$value , "\' is larger than 1.", + "Only a single input value (string/number) is allowed as a constraint_value.")) } constraint$value <- tmpValue } From d7bf1d9ae10d35dfeb1a59a0cfd0d7ae3968a3a4 Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Tue, 12 Apr 2016 13:27:19 +0200 Subject: [PATCH 04/26] small change so that buildQueryXML can be tested --- R/getPatientSetID.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/R/getPatientSetID.R b/R/getPatientSetID.R index f3af385..c8a423b 100644 --- a/R/getPatientSetID.R +++ b/R/getPatientSetID.R @@ -36,7 +36,14 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = patientset.constraints <- .checkPatientSetConstraints(patientset.constraints) message("Processing input...", "") - xmlQuery <- .buildXMLquery(patientset.constraints, study.name) + # retrieve concept information for the given study, and only keep relevant columns. + # this will be used later to match the concepts supplied by the user as part of the constraint definition to concept + # paths. + studyConcepts <- getConcepts(study.name) + studyConcepts <- studyConcepts[, c("name", "fullName", "type", "api.link.self.href")] + studyConcepts <- .findEndLeaves(studyConcepts) + + xmlQuery <- .buildXMLquery(patientset.constraints, studyConcepts) # do POST request, and store result message("\nCreating patient set...", "") @@ -82,13 +89,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = # parse the constraints, and turn it into a query in XML format -.buildXMLquery <- function(patientset.constraints, study.name){ - # retrieve concept information for the given study, and only keep relevant columns. - # this will be used later to match the concepts supplied by the user as part of the constraint definition to concept - # paths. - studyConcepts <- getConcepts(study.name) - studyConcepts <- studyConcepts[, c("name", "fullName", "type", "api.link.self.href")] - studyConcepts <- .findEndLeaves(studyConcepts) +.buildXMLquery <- function(patientset.constraints, studyConcepts){ ## parse the expression containing the constraints and translate this into a query definition in XML format parsedConstraintsXMLlist <- .parsePatientSetConstraints(patientset.constraints, studyConcepts) From 5a3b162289e41fa5b0e159ed5954ac4ba65142d2 Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Fri, 6 May 2016 16:36:36 +0200 Subject: [PATCH 05/26] some improvements to documentation and getPatientSetID --- R/getPatientSetID.R | 107 ++++++++++++++++++++--------------------- man/getPatientSetID.Rd | 40 +++++++-------- 2 files changed, 68 insertions(+), 79 deletions(-) diff --git a/R/getPatientSetID.R b/R/getPatientSetID.R index c8a423b..98b4c95 100644 --- a/R/getPatientSetID.R +++ b/R/getPatientSetID.R @@ -43,7 +43,11 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = studyConcepts <- studyConcepts[, c("name", "fullName", "type", "api.link.self.href")] studyConcepts <- .findEndLeaves(studyConcepts) + # read the constraints given by the user, and convert this to a XML query definition in the format as expected by REST-API xmlQuery <- .buildXMLquery(patientset.constraints, studyConcepts) + hrConstraints <- .makeHumanReadableQuery(xmlQuery) + xmlQuery <- saveXML(xmlQuery, prefix = '\n') #convert XML tree to string + if(getOption("verbose")) { message(xmlQuery) } # do POST request, and store result message("\nCreating patient set...", "") @@ -53,8 +57,6 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = #return patient.set ID patientsetID <- serverResult$id - hrConstraints <- .makeHumanReadableQuery(xmlQuery) - result <- list(patientsetID = patientsetID, patientsetSize = serverResult$setSize, input_patientset.constraints = .expressionToText(patientset.constraints), finalQueryConstraints = hrConstraints) @@ -108,12 +110,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = for(i in 1:length(parsedConstraintsXMLlist)){ xmlQuery <- append.XMLNode(xmlQuery, parsedConstraintsXMLlist[[i]]) } - - xmlQuery_asString <- saveXML(xmlQuery, prefix = '\n') - - if (getOption("verbose")) { message(xmlQuery_asString) } - - return(xmlQuery_asString) + return(xmlQuery) } @@ -287,60 +284,56 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = .makeHumanReadableQuery <- function(xmlQuery){ + parsedXML <- "" - xmlQuery <- strsplit(xmlQuery, "\n")[[1]] - panelCount <- 0 - itemCount <- 0 - invert <- "0" - for(i in 1:length(xmlQuery)){ - oneLine <- xmlQuery[[i]] + + if(all(names(xmlQuery)== "panel")){ + panels <- xmlChildren(xmlQuery) - if(grepl("",oneLine)){ - if(panelCount > 0 ){parsedXML <- paste(parsedXML, " & \n(", sep = "") - }else{parsedXML <- paste(parsedXML, "(", sep = "")} - panelCount <- panelCount + 1 - itemCount <- 0 - } - if(grepl("", oneLine)){ - invert <- .removeXMLmarkUp(oneLine, "invert") - invert <- gsub("[[:blank:]]", "",invert) - if(invert == "1"){ parsedXML <- paste(parsedXML, "!(", sep = "")} - } - if(grepl("",oneLine)){ - if(invert == "1"){ parsedXML <- paste(parsedXML, "))", sep = "") - }else{parsedXML <- paste(parsedXML, ")", sep = "")} - } - if(grepl("",oneLine)){ - if(itemCount > 0 ){parsedXML <- paste(parsedXML, " | ", sep = "")} - itemCount <- itemCount + 1 + for(i in 1:length(panels)){ + panel <- panels[[i]] + + if(i == 1){parsedXML <- paste(parsedXML, "(", sep = "") #first panel + }else{parsedXML <- paste(parsedXML, " & \n(", sep = "")} + + invert <- xmlValue(panel[["invert"]]) + if(invert == "1"){parsedXML <- paste(parsedXML, "!(", sep = "") } + + #add the children + items <- xmlElementsByTagName(panel, "item") + for(j in 1:length(items)){ + item <- items[[j]] + if(j > 1){parsedXML <- paste(parsedXML, " | ", sep = "")} + + #get concept path + item_key <- xmlValue(item[["item_key"]]) + concept_path <- gsub("\\\\\\\\Public Studies", "", item_key) + concept_path <- gsub("\\\\\\\\Private Studies", "", concept_path) + parsedXML <- paste(parsedXML, "\"", concept_path, "\"", sep = "") + + #if constraint operator and constraint value are given, get these + childNames <- names(item) + if(grepl("value_operator", childNames)){ + valueOperator <- xmlValue(item[["value_operator"]]) + parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") + } + if(grepl("value_constraint",childNames)){ + valueConstraint <- xmlValue(item[["value_constraint"]]) + parsedXML <- paste(parsedXML, " ", valueConstraint, sep = "") + } + } + + #close brackets for panel + if(invert == "1"){parsedXML <- paste(parsedXML, "))", sep = "") + }else{parsedXML <- paste(parsedXML, ")", sep = "") } } - if(grepl("",oneLine)){ - #get concept path - item_key <- .removeXMLmarkUp(oneLine, "item_key") - concept_path <- gsub("\\\\\\\\Public Studies", "", item_key) - concept_path <- gsub("\\\\\\\\Private Studies", "", concept_path) - parsedXML <- paste(parsedXML, "\"", concept_path, "\"", sep = "") - } - if(grepl("",oneLine)){ - valueOperator <- .removeXMLmarkUp(oneLine, "value_operator") - parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") - } - if(grepl("",oneLine)){ - valueConstraint <- .removeXMLmarkUp(oneLine, "value_constraint") - parsedXML <- paste(parsedXML, " ", valueConstraint, " ", sep = "") - } } + if(parsedXML == ""){warning("Something went wrong with making a human readable version of the XML. + This does not affect the formation of the patient set")} return(parsedXML) } -.removeXMLmarkUp <- function(string, markUpString){ - string <- gsub(paste("^[[:blank:]]*<", markUpString,">", sep = ""), "", string) - string <- gsub(paste("", sep = ""), "", string) - return(string) -} - - #just needs one conceptPath, can be of any of the concepts in the study. It can be any path in column 'fullName' .addStudyPanel <- function (constraintXMLlist, study.name, conceptPath){ # retrieve the path for the study concept, by taking only the first part of the supplied concept path up to and @@ -554,10 +547,12 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = stop(err_message) } if(length(result) >1){ - tryCatch(stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, + write(paste("The content of object: \'", concept, "\' is:", sep = "" ),"") + print(result) + stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, ".\nObject length of \'", concept , "\' is larger than 1. Only a single string is allowed for specifying the concept.", - "\nInput for concept: ", sep = "") , call. = F), finally = print(result)) + "The content of this concept variable is printed above this error message.", sep = "")) } concept <- result } diff --git a/man/getPatientSetID.Rd b/man/getPatientSetID.Rd index ef7b80f..3127c8d 100644 --- a/man/getPatientSetID.Rd +++ b/man/getPatientSetID.Rd @@ -5,50 +5,50 @@ Define a patient set based on a series of constraints and retrieve the patient.set ID } \description{ -This function can be used to create a patient.set in tranSMART, based on a set of constraints, and it returns the ID of the created patient.set. This ID can be used in other functions, such as \code{\link{getObservations}} and \code{\link{getHighDimData}} to retrieve the data for only the patients that belong to that specific patient.set.\cr -The function returns a list with the ID of the newly created patient.set, the size of the created patient.set, the original user input specifying the constraints and the interpretation of that input, containing the constraints that were sent to tranSMART(called "finalQueryConstraints"). Optionally, the body of the POST request can be returned as well. This body contains the query definition in XML format as it is sent to tranSMART. +This function can be used to create a patient.set in tranSMART based on a set of constraints, and it returns the ID of the created patient.set. This ID can be used in other functions, such as \code{\link{getObservations}} and \code{\link{getHighDimData}} to retrieve only the data for the patients that belong to that specific patient.set.\cr +The function returns a list with the ID of the newly created patient.set, the size of the patient.set, the original user input specifying the constraints and the interpretation of that input (ie.the constraints that were sent to tranSMART, called "finalQueryConstraints"). Optionally, the body of the POST request can be returned as well. This body contains the query definition in XML format as it is sent to tranSMART. } \usage{ -getPatientSetID(patientset.constraints, study.name, returnXMLquery = FALSE) +getPatientSetID(study.name, patientset.constraints, returnXMLquery = FALSE) } \arguments{ \item{study.name}{a character string giving the name of a study} - \item{patientset.constraints}{the definition of the patient.set constraints: an expression containing all criteria (constraints) that the patients in the patient.set have to meet, or an object containing one such an expression as created with function \code{\link{substitute}}. This expression can contain one or more constraints, concatenated by the "AND" (\code{&}) and OR (\code{|}) operators, and has to meet a strict format. \cr + \item{patientset.constraints}{the definition of the patient.set constraints: an expression containing all criteria (constraints) that the patients in the patient.set have to meet, or an object containing one such an expression as created with function \code{\link{substitute}}, or a string. This expression can contain one or more constraints, concatenated by the AND (\code{&}) and OR (\code{|}) operators, and it has to meet a strict format. \cr A single constraint can either be: \enumerate{ -\item a string with a reference to a concept; ie. a pattern/regular expression matching a concept name, or a partial/full concept path or link, as can be found in the \code{"fullName"} and \code{"api.link.self.href"} column of the concept table retrieved by \code{getConcepts("STUDY_NAME"))}. The pattern or string is used to match it to a concept name/path/link from the \code{getConcepts(study.name)} table and return the full concept path for the matching concept. - If only single node is supplied as a constraint, without constraint operator and constraint value, then any study concept (node) can be used, and even the values of categorical concepts. All patients with a (non-missing) value for that concept will be selected. If concept paths are used, then strings are taken literally, as in \code{\link{grep}} with \code{fixed = T}, meaning it is case sensitive and regular expressions cannot be used. -\item or a constraint of the format {reference to concept}{relational_operator}{constraint_value}, e.g. "age" < 65. All patients that meet the criterium are selected. In this case it is not possible to use just any concept: the concept has to be a numerical or categorical data node (end leaf). Selection on values of a high dimensional node is not supported. +\item a string with a reference to a concept. Ie. a pattern/regular expression matching a concept name, or a partial/full concept path or link, as can be found in the \code{"fullName"} and \code{"api.link.self.href"} column of the concept table retrieved by \code{getConcepts("STUDY_NAME"))}. The pattern or string is used to match it to a concept name/path/link in the \code{getConcepts(study.name)} table and to then return the full concept path for the matching concept. + If only single node is supplied as a constraint, without constraint operator and constraint value, then any study concept (node) can be used, and even the values of categorical concepts can be used. All patients with a (non-missing) value for that concept will be selected. If concept paths are used, then strings are taken literally, as in \code{\link{grep}} with \code{fixed = T}, meaning it is case sensitive and regular expressions cannot be used. +\item or a constraint of the format: \{string with reference to a concept (see point 1)\}\{relational_operator\}\{constraint_value\}, e.g. "age" < 65. All patients that meet the criterium are selected. In this case it is not possible to use just any concept: the concept has to be a numerical or categorical data node (end leaf). Selection on values of a high dimensional node is not supported. } -References to concepts, and constraint_values can also be passed as an object (variable) with one element, being a string or a string/number respectively. E.g. if \"code{concepts = c("age", "sex", "ethnicity")} supplying \code{concepts[[1]] < 65} will also work. +References to concepts and constraint_values can also be passed as an object (variable) with one element, being a string or a string/number respectively. E.g. if \code{concepts = c("age", "sex", "ethnicity")} supplying \code{concepts[[1]] < 65} will also work. -Multiple (sub)constraints can be combined into a larger constraint by use of the "AND" (\code{&}) and OR (\code{|}) operators. Note: this has to adhere to a strict format: if both "AND" and "OR" operators are used in a single patient.set constraints definition, the "AND" operator always has to be on the highest/outer level and the "OR" operator has to be on the lower/inner level. E.g. if constraints c1, c2, etc..., are concatenated with both "AND" and "OR" operators, then the whole constraint has to be of the format \code{X1 & X2 & X3}, with \code{Xi} being a concatenation of one or more (sub)constraints separated by the "OR" operator (and surrounded by brackets), eg. \code{Xi = (c1 | c2 | c3)} . A complete constraint, consisting of a concatenation of subconstraint, can for example be: \code{(c1 | c2) & (c3 | c4 | c5) & c6 & ...} or \code{c1}, \code{c1 | c2 | c3}, \code{c1 & c2}. \cr +Multiple (sub)constraints can be combined into a larger constraint by use of the AND (\code{&}) and OR (\code{|}) operators. Note: this has to adhere to a strict format: if both "AND" and "OR" operators are used in a single patient.set constraints definition, the "AND" operator always has to be on the highest(outer) level and the "OR" operator has to be on the lower(inner) level. E.g. if constraints c1, c2, etc..., are concatenated with both "AND" and "OR" operators, then the whole constraint has to be of the format \code{X1 & X2 & X3}, with \code{Xi} being a concatenation of one or more (sub)constraints separated by the "OR" operator (and surrounded by brackets), eg. \code{Xi = (c1 | c2 | c3)} . A complete constraint, consisting of a concatenation of subconstraints, could for example be: \code{(c1 | c2) & (c3 | c4 | c5) & c6 & ...}, or \code{c1}, or \code{c1 | c2 | c3}, \code{c1 & c2}. \cr Example: "sex"== "female" & ("age" > 65 | "blood_pressure" > 140) & ("diagnosis" == "diabetes" | "diagnosis" == "prediabetic") - See also \code{\link{substitute}} for creating expressions in case you want to store expressions in a variable before calling getPatientSetID. E.g. \code{getPatientSetID("SOME STUDY", "age" < 65)}, will have the same result as: \code{my_expression <- substitute("age" < 65) ; getPatientSetID("SOME STUDY", my_expression)}. Alternatively, the constraints can also be given as a string (\code{getPatientSetID("SOME STUDY", "\"age\" < 65")} or \code{age_concept<- "age"; getPatientSetID("SOME STUDY", "age_concept < 65") }), but this might not be fully supported. If the constraints are supplied as a string, things that should be interpreted as text should be quoted, this applies for example to the concept names/paths/links and the categorical values (e.g. "\"age\" > 65"), else this will be interpreted as a variable name and the function will try to find a variable with that name in the global environment and use the value stored in that variable.} + See also \code{\link{substitute}} for creating expressions in case you want to store the expressions in a variable before calling getPatientSetID. E.g. \code{getPatientSetID("SOME STUDY", "age" < 65)}, will have the same result as: \code{my_expression <- substitute("age" < 65) ; getPatientSetID("SOME STUDY", my_expression)}. Alternatively, the constraints can also be given as a string (\code{getPatientSetID("SOME STUDY", "\"age\" < 65")} or \code{age_concept <- "age"; getPatientSetID("SOME STUDY", "age_concept < 65") }), but this might not be fully supported. If the constraints are supplied as a single string, things that should be interpreted as text should be quoted. This applies for example to concept names/paths/links and the categorical values (e.g. "\"sex\" == \"Male\""). Else this will be interpreted as a variable name and the function will try to find a variable with that name in the global environment and use the value stored in that variable.} \item{returnXMLquery}{If TRUE the body of the POST is request is part of the returned list. This contains the query definition in XML format.} } \details{ -If a constraint is of the form \{reference to concept\}\{relational_operator\}\{constraint_value\}, the following operations are possible: +For a constraint of the form \{reference to concept\}\{relational_operator\}\{constraint_value\}, the following operations are possible: \itemize{ \item For numerical data nodes the relational_operations "<", ">", "<=",">=", "==" and "!=" can be used. \item For categorical data nodes, only the "==" operation is possible. } some examples: \cr -correct format: \code{( "age" < 65 | "sex" == "Female") & "test" & ("test" == 1 | "test" == 2 | "test" == 3) } \cr -wrong format: \code{( "age" < 65 | "sex" == "female") & ("test" == 1 | ("test2" < 2 & "test3" == 3)) } (the & after '"test2" < 2' is on a lower/more inner level than the |) \cr -wrong format: \code{(( "age" < 65 | "sex" == "female") & ("test" == 1)) | "test" == 4 } (the | before the '"test4" == 4' is on a higher/more outer level than the &) +correct format: \code{( "age" < 65 | "sex" == "Female") & "test" & ("test" == 1 | "test" == 2 | "test" == 3) } -- the & operators are on the outer levels, and the expressions with the | operators are on the inner levels and between brackets \cr +wrong format: \code{( "age" < 65 | "sex" == "female") & ("test" == 1 | ("test2" < 2 & "test3" == 3)) } --the & after '"test2" < 2' is on a lower/more inner level than the | \cr +wrong format: \code{(( "age" < 65 | "sex" == "female") & ("test" == 1)) | "test" == 4 } --the | before the '"test4" == 4' is on a higher/more outer level than the & } \value{ -A list with the ID of the newly created patient.set, the size of the created patient.set, the original user input specifying the constraints and the interpretation of that input, containing the constraints that were sent to tranSMART and optionally the body of the POST request. +A list with the ID of the newly created patient.set, the size of the patient.set, the original user input specifying the constraints and the interpretation of that input (ie.the constraints that were sent to tranSMART), and optionally the body of the POST request. \item{patientsetID}{a numerical value containing the ID of the newly created patient.set. This patient.set ID can be used in other functions, such as \code{\link{getObservations}} and \code{\link{getHighDimData}} to retrieve the data for only the patients that belong to that specific patient.set} \item{patientsetSize}{a numerical value specifying the number of patients in the created patient.set} \item{input_patientset.constraints}{a character string containing the input by the user} -\item{finalQueryConstraints}{a character string representing the interpretation of the user input, containing the the constraints that were sent to tranSMART. This part of the output can be used to check if indeed the right concepts were selected, based on the input. For concepts the full concept path is given and relational operators are represented by text: "<" is represented by "LT", ">" by "GT", "<=" by "LE",">=" by "GE", "==" by "EQ" and "!=" by "NE". \crNote: the query constraints will always include a study concept path as well; this is added to ensure only patients from the supplied study are selected. Note 2: if a constraint was supplied for a categorical node in the form of \{concept\}\{relational_operator\}\{categorical value\}, e.g. "sex" == "female", only the path to the categorical value is represented (e.g. '\\Public Studies\\SOME STUDY\\Subjects\\Sex\\female').} +\item{finalQueryConstraints}{a character string representing the interpretation of the user input, containing the the constraints that were sent to tranSMART. This part of the output can be used to check if indeed the right concepts were selected, based on the input. For concepts the full concept path is given and relational operators are represented by text: "<" is represented by "LT", ">" by "GT", "<=" by "LE",">=" by "GE", "==" by "EQ" and "!=" by "NE". \cr Note: the query constraints will always include a study concept path as well; this is added to ensure that only patients from the supplied study are selected. \cr Note 2: if a constraint was supplied for a categorical node in the form of \{concept\}\{relational_operator\}\{categorical value\}, e.g. "sex" == "female", only the path to the categorical value is represented (e.g. '\\Public Studies\\SOME STUDY\\Subjects\\Sex\\female').} \item{xmlQuery}{a character string containing the body of the POST request that is sent to the tranSMART instance. This body contains the query definition in XML format as it is sent to tranSMART. This is only returned if returnXMLquery = T.} } \references{ @@ -97,14 +97,8 @@ Contact: development@thehyve.nl} # or if concept name is stored in an object (variable): age_concept<- "age" getPatientSetID("GSE8581", "age_concept < 65") - - - #retrieve clinical observation data for patient.set - - #retrieve high dim data for patient.set - }. - + } } From 915049d01ed733f9c525de2d5c790d45908e448d Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Fri, 6 May 2016 16:50:17 +0200 Subject: [PATCH 06/26] Maximal line width in documentation getPatientSetID --- man/getPatientSetID.Rd | 96 +++++++++++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 20 deletions(-) diff --git a/man/getPatientSetID.Rd b/man/getPatientSetID.Rd index 3127c8d..dd81ad4 100644 --- a/man/getPatientSetID.Rd +++ b/man/getPatientSetID.Rd @@ -5,8 +5,13 @@ Define a patient set based on a series of constraints and retrieve the patient.set ID } \description{ -This function can be used to create a patient.set in tranSMART based on a set of constraints, and it returns the ID of the created patient.set. This ID can be used in other functions, such as \code{\link{getObservations}} and \code{\link{getHighDimData}} to retrieve only the data for the patients that belong to that specific patient.set.\cr -The function returns a list with the ID of the newly created patient.set, the size of the patient.set, the original user input specifying the constraints and the interpretation of that input (ie.the constraints that were sent to tranSMART, called "finalQueryConstraints"). Optionally, the body of the POST request can be returned as well. This body contains the query definition in XML format as it is sent to tranSMART. +This function can be used to create a patient.set in tranSMART based on a set of constraints, and it returns the ID of +the created patient.set. This ID can be used in other functions, such as \code{\link{getObservations}} and +\code{\link{getHighDimData}} to retrieve only the data for the patients that belong to that specific patient.set.\cr +The function returns a list with the ID of the newly created patient.set, the size of the patient.set, the original user +input specifying the constraints and the interpretation of that input (ie.the constraints that were sent to tranSMART, +called "finalQueryConstraints"). Optionally, the body of the POST request can be returned as well. This body contains +the query definition in XML format as it is sent to tranSMART. } \usage{ getPatientSetID(study.name, patientset.constraints, returnXMLquery = FALSE) @@ -14,49 +19,100 @@ getPatientSetID(study.name, patientset.constraints, returnXMLquery = FALSE) \arguments{ \item{study.name}{a character string giving the name of a study} - \item{patientset.constraints}{the definition of the patient.set constraints: an expression containing all criteria (constraints) that the patients in the patient.set have to meet, or an object containing one such an expression as created with function \code{\link{substitute}}, or a string. This expression can contain one or more constraints, concatenated by the AND (\code{&}) and OR (\code{|}) operators, and it has to meet a strict format. \cr + \item{patientset.constraints}{the definition of the patient.set constraints: an expression containing all criteria + (constraints) that the patients in the patient.set have to meet, or an object containing one such an expression as + created with function \code{\link{substitute}}, or a string. This expression can contain one or more constraints, + concatenated by the AND (\code{&}) and OR (\code{|}) operators, and it has to meet a strict format. \cr A single constraint can either be: \enumerate{ -\item a string with a reference to a concept. Ie. a pattern/regular expression matching a concept name, or a partial/full concept path or link, as can be found in the \code{"fullName"} and \code{"api.link.self.href"} column of the concept table retrieved by \code{getConcepts("STUDY_NAME"))}. The pattern or string is used to match it to a concept name/path/link in the \code{getConcepts(study.name)} table and to then return the full concept path for the matching concept. - If only single node is supplied as a constraint, without constraint operator and constraint value, then any study concept (node) can be used, and even the values of categorical concepts can be used. All patients with a (non-missing) value for that concept will be selected. If concept paths are used, then strings are taken literally, as in \code{\link{grep}} with \code{fixed = T}, meaning it is case sensitive and regular expressions cannot be used. -\item or a constraint of the format: \{string with reference to a concept (see point 1)\}\{relational_operator\}\{constraint_value\}, e.g. "age" < 65. All patients that meet the criterium are selected. In this case it is not possible to use just any concept: the concept has to be a numerical or categorical data node (end leaf). Selection on values of a high dimensional node is not supported. +\item a string with a reference to a concept. Ie. a pattern/regular expression matching a concept name, or a +partial/full concept path or link, as can be found in the \code{"fullName"} and \code{"api.link.self.href"} column of +the concept table retrieved by \code{getConcepts("STUDY_NAME"))}. The pattern or string is used to match it to a concept +name/path/link in the \code{getConcepts(study.name)} table and to then return the full concept path for the matching +concept. + If only single node is supplied as a constraint, without constraint operator and constraint value, then any study + concept (node) can be used, and even the values of categorical concepts can be used. All patients with a (non-missing) + value for that concept will be selected. If concept paths are used, then strings are taken literally, as in + \code{\link{grep}} with \code{fixed = T}, meaning it is case sensitive and regular expressions cannot be used. +\item or a constraint of the format: +\{string with reference to a concept (see point 1)\}\{relational_operator\}\{constraint_value\}, e.g. "age" < 65. All +patients that meet the criterium are selected. In this case it is not possible to use just any concept: the concept has +to be a numerical or categorical data node (end leaf). Selection on values of a high dimensional node is not supported. } -References to concepts and constraint_values can also be passed as an object (variable) with one element, being a string or a string/number respectively. E.g. if \code{concepts = c("age", "sex", "ethnicity")} supplying \code{concepts[[1]] < 65} will also work. +References to concepts and constraint_values can also be passed as an object (variable) with one element, being a string +or a string/number respectively. E.g. if \code{concepts = c("age", "sex", "ethnicity")} supplying +\code{concepts[[1]] < 65} will also work. -Multiple (sub)constraints can be combined into a larger constraint by use of the AND (\code{&}) and OR (\code{|}) operators. Note: this has to adhere to a strict format: if both "AND" and "OR" operators are used in a single patient.set constraints definition, the "AND" operator always has to be on the highest(outer) level and the "OR" operator has to be on the lower(inner) level. E.g. if constraints c1, c2, etc..., are concatenated with both "AND" and "OR" operators, then the whole constraint has to be of the format \code{X1 & X2 & X3}, with \code{Xi} being a concatenation of one or more (sub)constraints separated by the "OR" operator (and surrounded by brackets), eg. \code{Xi = (c1 | c2 | c3)} . A complete constraint, consisting of a concatenation of subconstraints, could for example be: \code{(c1 | c2) & (c3 | c4 | c5) & c6 & ...}, or \code{c1}, or \code{c1 | c2 | c3}, \code{c1 & c2}. \cr -Example: "sex"== "female" & ("age" > 65 | "blood_pressure" > 140) & ("diagnosis" == "diabetes" | "diagnosis" == "prediabetic") +Multiple (sub)constraints can be combined into a larger constraint by use of the AND (\code{&}) and OR (\code{|}) +operators. Note: this has to adhere to a strict format: if both "AND" and "OR" operators are used in a single +patient.set constraints definition, the "AND" operator always has to be on the highest(outer) level and the "OR" +operator has to be on the lower(inner) level. E.g. if constraints c1, c2, etc..., are concatenated with both "AND" and +"OR" operators, then the whole constraint has to be of the format \code{X1 & X2 & X3}, with \code{Xi} being a +concatenation of one or more (sub)constraints separated by the "OR" operator (and surrounded by brackets), eg. +\code{Xi = (c1 | c2 | c3)} . A complete constraint, consisting of a concatenation of subconstraints, could for example +be: \code{(c1 | c2) & (c3 | c4 | c5) & c6 & ...}, or \code{c1}, or \code{c1 | c2 | c3}, \code{c1 & c2}. \cr +Example: +"sex"== "female" & ("age" > 65 | "blood_pressure" > 140) & ("diagnosis" == "diabetes" | "diagnosis" == "prediabetic") - See also \code{\link{substitute}} for creating expressions in case you want to store the expressions in a variable before calling getPatientSetID. E.g. \code{getPatientSetID("SOME STUDY", "age" < 65)}, will have the same result as: \code{my_expression <- substitute("age" < 65) ; getPatientSetID("SOME STUDY", my_expression)}. Alternatively, the constraints can also be given as a string (\code{getPatientSetID("SOME STUDY", "\"age\" < 65")} or \code{age_concept <- "age"; getPatientSetID("SOME STUDY", "age_concept < 65") }), but this might not be fully supported. If the constraints are supplied as a single string, things that should be interpreted as text should be quoted. This applies for example to concept names/paths/links and the categorical values (e.g. "\"sex\" == \"Male\""). Else this will be interpreted as a variable name and the function will try to find a variable with that name in the global environment and use the value stored in that variable.} - \item{returnXMLquery}{If TRUE the body of the POST is request is part of the returned list. This contains the query definition in XML format.} + See also \code{\link{substitute}} for creating expressions in case you want to store the expressions in a variable + before calling getPatientSetID. E.g. \code{getPatientSetID("SOME STUDY", "age" < 65)}, will have the same result as: + \code{my_expression <- substitute("age" < 65) ; getPatientSetID("SOME STUDY", my_expression)}. Alternatively, the + constraints can also be given as a string (\code{getPatientSetID("SOME STUDY", "\"age\" < 65")} or + \code{age_concept <- "age"; getPatientSetID("SOME STUDY", "age_concept < 65") }), but this might not be fully + supported. If the constraints are supplied as a single string, things that should be interpreted as text should be + quoted. This applies for example to concept names/paths/links and the categorical values (e.g. "\"sex\" == \"Male\""). + Else this will be interpreted as a variable name and the function will try to find a variable with that name in the + global environment and use the value stored in that variable.} + \item{returnXMLquery}{If TRUE the body of the POST is request is part of the returned list. This contains the query + definition in XML format.} } \details{ -For a constraint of the form \{reference to concept\}\{relational_operator\}\{constraint_value\}, the following operations are possible: +For a constraint of the form \{reference to concept\}\{relational_operator\}\{constraint_value\}, the following +operations are possible: \itemize{ \item For numerical data nodes the relational_operations "<", ">", "<=",">=", "==" and "!=" can be used. \item For categorical data nodes, only the "==" operation is possible. } some examples: \cr -correct format: \code{( "age" < 65 | "sex" == "Female") & "test" & ("test" == 1 | "test" == 2 | "test" == 3) } -- the & operators are on the outer levels, and the expressions with the | operators are on the inner levels and between brackets \cr -wrong format: \code{( "age" < 65 | "sex" == "female") & ("test" == 1 | ("test2" < 2 & "test3" == 3)) } --the & after '"test2" < 2' is on a lower/more inner level than the | \cr -wrong format: \code{(( "age" < 65 | "sex" == "female") & ("test" == 1)) | "test" == 4 } --the | before the '"test4" == 4' is on a higher/more outer level than the & +correct format: \code{( "age" < 65 | "sex" == "Female") & "test" & ("test" == 1 | "test" == 2 | "test" == 3) } -- the +& operators are on the outer levels, and the expressions with the | operators are on the inner levels and between +brackets \cr +wrong format: \code{( "age" < 65 | "sex" == "female") & ("test" == 1 | ("test2" < 2 & "test3" == 3)) } --the & after +'"test2" < 2' is on a lower/more inner level than the | \cr +wrong format: \code{(( "age" < 65 | "sex" == "female") & ("test" == 1)) | "test" == 4 } --the | before the +'"test4" == 4' is on a higher/more outer level than the & } \value{ -A list with the ID of the newly created patient.set, the size of the patient.set, the original user input specifying the constraints and the interpretation of that input (ie.the constraints that were sent to tranSMART), and optionally the body of the POST request. -\item{patientsetID}{a numerical value containing the ID of the newly created patient.set. This patient.set ID can be used in other functions, such as \code{\link{getObservations}} and \code{\link{getHighDimData}} to retrieve the data for only the patients that belong to that specific patient.set} +A list with the ID of the newly created patient.set, the size of the patient.set, the original user input specifying the +constraints and the interpretation of that input (ie.the constraints that were sent to tranSMART), and optionally the +body of the POST request. +\item{patientsetID}{a numerical value containing the ID of the newly created patient.set. This patient.set ID can be +used in other functions, such as \code{\link{getObservations}} and \code{\link{getHighDimData}} to retrieve the data +for only the patients that belong to that specific patient.set} \item{patientsetSize}{a numerical value specifying the number of patients in the created patient.set} \item{input_patientset.constraints}{a character string containing the input by the user} -\item{finalQueryConstraints}{a character string representing the interpretation of the user input, containing the the constraints that were sent to tranSMART. This part of the output can be used to check if indeed the right concepts were selected, based on the input. For concepts the full concept path is given and relational operators are represented by text: "<" is represented by "LT", ">" by "GT", "<=" by "LE",">=" by "GE", "==" by "EQ" and "!=" by "NE". \cr Note: the query constraints will always include a study concept path as well; this is added to ensure that only patients from the supplied study are selected. \cr Note 2: if a constraint was supplied for a categorical node in the form of \{concept\}\{relational_operator\}\{categorical value\}, e.g. "sex" == "female", only the path to the categorical value is represented (e.g. '\\Public Studies\\SOME STUDY\\Subjects\\Sex\\female').} -\item{xmlQuery}{a character string containing the body of the POST request that is sent to the tranSMART instance. This body contains the query definition in XML format as it is sent to tranSMART. This is only returned if returnXMLquery = T.} +\item{finalQueryConstraints}{a character string representing the interpretation of the user input, containing the the +constraints that were sent to tranSMART. This part of the output can be used to check if indeed the right concepts were +selected, based on the input. For concepts the full concept path is given and relational operators are represented by +text: "<" is represented by "LT", ">" by "GT", "<=" by "LE",">=" by "GE", "==" by "EQ" and "!=" by "NE". \cr Note: the +query constraints will always include a study concept path as well; this is added to ensure that only patients from the +supplied study are selected. \cr Note 2: if a constraint was supplied for a categorical node in the form of +\{concept\}\{relational_operator\}\{categorical value\}, e.g. "sex" == "female", only the path to the categorical valu +e is represented (e.g. '\\Public Studies\\SOME STUDY\\Subjects\\Sex\\female').} +\item{xmlQuery}{a character string containing the body of the POST request that is sent to the tranSMART instance. +This body contains the query definition in XML format as it is sent to tranSMART. This is only returned if +returnXMLquery = T.} } \references{ } \author{ Tim Dorscheidt, Jan Kanis, Rianne Jansen. Contact: development@thehyve.nl} -\note{To be able to access a transmart database, you need to be connected to the server the database is on. If you haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} +\note{To be able to access a transmart database, you need to be connected to the server the database is on. If you +haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} \seealso{ \code{\link{getHighdimData}} and \code{\link{getObservations}} } From e5c896cc140209b7b242e07234c5637fc6086572 Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Mon, 9 May 2016 12:45:23 +0200 Subject: [PATCH 07/26] :fish:, NULL for missing values --- R/RClientConnectionManager.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/RClientConnectionManager.R b/R/RClientConnectionManager.R index 69c90a5..f1b6afd 100644 --- a/R/RClientConnectionManager.R +++ b/R/RClientConnectionManager.R @@ -264,15 +264,15 @@ function (oauthDomain = transmartClientEnv$transmartDomain, prefetched.request.t .serverMessageExchange <- function(apiCall, httpHeaderFields, accept.type = "default", progress = .make.progresscallback.download(), - post.content.type = "", requestBody = "") { + post.content.type = NULL, requestBody = NULL) { if (any(accept.type == c("default", "hal"))) { if (accept.type == "hal") { httpHeaderFields <- c(httpHeaderFields, Accept = "application/hal+json;charset=UTF-8") } curlOptions <- list() - if (post.content.type != ""){ - httpHeaderFields <- c(httpHeaderFields, 'content-type' = post.content.type) - if(requestBody == ""){ stop("Missing body for POST request")} - curlOptions[["postfields"]] <- requestBody - } + if (!is.null(post.content.type)) { + httpHeaderFields <- c(httpHeaderFields, 'content-type' = post.content.type) + if(is.null(requestBody)) { stop("Missing body for POST request") } + curlOptions[["postfields"]] <- requestBody + } headers <- basicHeaderGatherer() result <- list(JSON = FALSE) curlOptions <- c(curlOptions, list(httpheader = httpHeaderFields, headerfunction = headers$update)) From 18c46627b435610039caf883c18014d9e2dd8ea0 Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Tue, 7 Jun 2016 14:05:50 +0200 Subject: [PATCH 08/26] add unit tests, some improvements and fixes to getPatientSetID --- R/getPatientSetID.R | 176 +++++++---- inst/unittests/resources/gse8581concepts.txt | 38 +++ inst/unittests/runitGetPatientSetID.R | 291 +++++++++++++++++++ man/getPatientSetID.Rd | 6 +- 4 files changed, 459 insertions(+), 52 deletions(-) create mode 100644 inst/unittests/resources/gse8581concepts.txt create mode 100644 inst/unittests/runitGetPatientSetID.R diff --git a/R/getPatientSetID.R b/R/getPatientSetID.R index 98b4c95..f4a3e62 100644 --- a/R/getPatientSetID.R +++ b/R/getPatientSetID.R @@ -30,12 +30,17 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = F){ if(missing(study.name)){stop("Provide study name")} if(missing(patientset.constraints)){stop("Provide patientset.constraints")} - + message("\nProcessing input...", "") + # retrieve the expression that defines the constraints - if(!is.call(patientset.constraints)){patientset.constraints <- substitute(patientset.constraints)} - patientset.constraints <- .checkPatientSetConstraints(patientset.constraints) + patientset.constraints <- substitute(patientset.constraints) #needs to be like this, with possible later evaluation in + # parsePatientsetConstraints because otherwise things such as "age"<65 & "biomarker data" + # will result in an error (problem is the string without operator) if you + # try e.g. is.call or is.character on the input + # if constraints are supplied as string, try to parse the string + patientset.constraints <- .checkPatientSetConstraints(patientset.constraints) + - message("Processing input...", "") # retrieve concept information for the given study, and only keep relevant columns. # this will be used later to match the concepts supplied by the user as part of the constraint definition to concept # paths. @@ -44,8 +49,8 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = studyConcepts <- .findEndLeaves(studyConcepts) # read the constraints given by the user, and convert this to a XML query definition in the format as expected by REST-API - xmlQuery <- .buildXMLquery(patientset.constraints, studyConcepts) - hrConstraints <- .makeHumanReadableQuery(xmlQuery) + xmlQuery <- .buildXMLquery(patientset.constraints, studyConcepts, study.name) + hrConstraints <- .makeSummaryOfQuery(xmlQuery) xmlQuery <- saveXML(xmlQuery, prefix = '\n') #convert XML tree to string if(getOption("verbose")) { message(xmlQuery) } @@ -61,7 +66,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = input_patientset.constraints = .expressionToText(patientset.constraints), finalQueryConstraints = hrConstraints) - message(paste("\nBased on the input, the following constraints were defined and sent to the server:\n", + message(paste("\nBased on the input, the following constraints were defined and sent to the server (always includes study concept):\n", result$finalQueryConstraints, sep = ""), "") if(returnXMLquery){result[["xmlQuery"]] <- xmlQuery} return(result) @@ -76,9 +81,13 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = The patient set constraints should be supplied in one single expression (or string).")} try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] - + if(length(patientsetConstraintsParsed) == 1){ + if(is.character(patientsetConstraintsParsed)){ #e.g. happens if input string is "\"age\"" + patientsetConstraints <- patientsetConstraintsParsed + } + } if(length(patientsetConstraintsParsed) > 1){ - message(paste("Detecting a string as input for patient set constraints - expected is an expression,", + message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", "such as: \"age\" > 65. \nWill attempt to parse the constraints out of the string, converting it", "into an expression...")) patientsetConstraints <- patientsetConstraintsParsed @@ -91,7 +100,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = # parse the constraints, and turn it into a query in XML format -.buildXMLquery <- function(patientset.constraints, studyConcepts){ +.buildXMLquery <- function(patientset.constraints, studyConcepts, study.name){ ## parse the expression containing the constraints and translate this into a query definition in XML format parsedConstraintsXMLlist <- .parsePatientSetConstraints(patientset.constraints, studyConcepts) @@ -150,9 +159,11 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = return(conceptListStudy) } + # parsePatientSetConstraints takes an expression defining the constraints for the patientset and returns # either a list of item XMLtrees or list of panel XMLtrees .parsePatientSetConstraints <- function(patientsetConstraints, studyConcepts){ + relationalOperators <- c("<", ">", "<=",">=", "==", "!=") logicalOperators <- c("&","&&", "|", "||") allowedOperators <- c(relationalOperators, logicalOperators) @@ -177,10 +188,20 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = # if length(patientsetConstraints) == 3, then the expression contains three elements, so it is either a low-level # constraint of the form {concept}{constraint_operator}{constraint_value} or it is a concatenation of constraints # separated by either an AND or OR operator (of form {some constraint(s)}{ &, &&, | or || }{some constraint(s)} ) - if(length(patientsetConstraints) == 3){ - constraintOperator <- as.character(patientsetConstraints[[1]]) + # alternatively it is an expression containing the call to substitute() or an object with index, e.g. variable[1], data.frame$firstColumn[firstRow],etc + # contain a string specifying a concept, or a string that in itself is a constraint definition, or an expression as + # created with subsitute for specifying a constraint . + + if(is.symbol(patientsetConstraints)){ + firstElement_in_allowedOperators <- F + }else{ + firstElement <- as.character(patientsetConstraints[[1]]) + firstElement_in_allowedOperators <- firstElement%in% allowedOperators + } + + if(length(patientsetConstraints) == 3 & firstElement_in_allowedOperators){ + constraintOperator <- firstElement - if(!constraintOperator%in% allowedOperators){stop(errorMsg)} constraint <- list() # in case where the (sub)constraint is a concatation of subconstraints, combined by an AND or OR operator @@ -201,8 +222,8 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = treeBeforeOperator <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) treeAfterOperator <- .parsePatientSetConstraints(patientsetConstraints[[3]], studyConcepts) - #if there is an "OR" operation inbetween two subconstraints, the whole constraint cannot have an & anymore - # (this is for forcing the strict format for constraint definition described above) + #if there is an "OR" operation inbetween two subconstraints, the combination of those two subconstraints cannot + # have an & anymore (this is for forcing the strict format for constraint definition described above) if(constraintOperator == "|" ) { if(grepl("&", .expressionToText(patientsetConstraints))){ stop(paste("Wrong format of (sub)constraint definition. Found in (sub)constraint: ", @@ -211,7 +232,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = "operator is on the highest level of the constraint definition \nand the | (or) operator on the ", "lowest (second) level, \nie. the format is 'x1' or (in case of multiple \'&\' operations) ", "'x1 & x2 & ...', \nwhere x1, x2, etc. can contain one or more subconstraints (called c here) ", - "separated by an | (or) operator, ie. x = c1 or x = c1 | c2 | ...,", + "separated by an | (or) operator, ie. x = c1 or x = (c1 | c2 | ...),", "\n where c is a single constraint such as \'\"age\" < 60\' or a reference to a concept.", "\n Examples of valid constraints: c1, c1|c2, c1&c2&c3, (c1|c2)&c3&(c4|c5|c6)", sep = "" )) @@ -245,12 +266,10 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = # element [[2]] contains the expression between the brackets, element [[1]] is '(' xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) return(xmlTreeList) - }else if(length(patientsetConstraints) == 1) { + }else if(length(patientsetConstraints) == 1 & is.character(patientsetConstraints)) { # Then the (sub)constraint should consist of only a specification of a concept. # This will result in selection of all patients that have a value for this concept. # Concept specification can be a string containing a pattern to match to the concept name or a concept path or link, - # or an object (variable) that contains such a string (only single string). - #retrieve concept path conceptPath <- .getConstraintConcept(patientsetConstraints, patientsetConstraints, studyConcepts, identical.match = F, testIfEndLeave = F)[["conceptPath"]] @@ -258,6 +277,51 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = # make itemTree for that concept itemXMLlist <- list(xmlNode("item", xmlNode("item_key", .makeItemKey(conceptPath)))) return(itemXMLlist) + }else if(class(patientsetConstraints) == "name" | class(patientsetConstraints) == "call"){ + # alternatively it is an expression containing a call to substite or an object (with or without index), + # e.g. variables[1] or variable, data.frame$firstColumn[firstRow],etc - this object can + # contain a string specifying a concept, or a string that in itself is a constraint definition, or an expression as + # created with subsitute for specifying a constraint . + # e.g. when following input is given: concepts<- c("age", "sex"); getPatientSetID("Some study", concepts[1]) or + # tmp <- c(substitute("age" <65), substitute("sex"== "female")); getPatientSetID("Some study", tmp[[1]]) + # or an object with strings specifying the constraints, and then the strings shoudl be turned into expressions too. + # tmp <- c("\"age\" <65", "\"sex\"== \"female\""); getPatientSetID("Some study", tmp[1]) + # try to evaluate the expression and find a matching constraint concept + result <- try(eval(patientsetConstraints, envir = globalenv()), silent = T) + if(class(result) == "try-error"){ + + stop(paste(attr(result, "condition")$message, "\n",errorMsg, sep = "")) + } + if(is.list(result)) + { + if(length(result) > 1){ + stop(paste("Incorrect input for patient set constraints.\n", + "Evaluation of input", .expressionToText(patientsetConstraints), + "results in a list with more than one element", + "while the function expects only a single string or a single expression", + "(as created with function substitute), not multiple.")) + } + if(length(result) == 1){ + warning(paste("Evaluation of input", .expressionToText(patientsetConstraints), + "results in a list with a single element.", + "Expected is a string or an expression (as created with function substitute).", + "Will try to use this single element in the list")) + result <- result[[1]] + } + } + if(length(result)==1) + { + if(is.na(result)) + { + stop(paste("Content of \'",.expressionToText(patientsetConstraints), "\' is 'NA'.", + " Cannot use 'NA' as constraint definition/concept specification.", sep = "")) + } + } + result <- .checkPatientSetConstraints(result) #parses constraint definition out of string, if applicable + patientsetConstraints <- result + + xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints, studyConcepts) + return(xmlTreeList) }else{ stop(errorMsg) } @@ -274,7 +338,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = textExpressionPasted <- paste(textExpressionPasted, collapse = "") #warnings are truncated, so it doesn't necessarily print all in case of long textExpression - message(paste("While trying to convert an expression to text, the deparse function cut an expression in two.", + message(paste("\nWhile trying to convert an expression to text, the deparse function cut an expression in two.", "\nSeparate parts:\n", paste("\t", textExpression, collapse = "\n AND \n"), "\nThese are pasted again together. Result:\n ", textExpressionPasted)) textExpression <- textExpressionPasted @@ -283,7 +347,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = } -.makeHumanReadableQuery <- function(xmlQuery){ +.makeSummaryOfQuery <- function(xmlQuery){ parsedXML <- "" @@ -293,11 +357,11 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = for(i in 1:length(panels)){ panel <- panels[[i]] - if(i == 1){parsedXML <- paste(parsedXML, "(", sep = "") #first panel - }else{parsedXML <- paste(parsedXML, " & \n(", sep = "")} + if(i == 1){parsedXML <- paste(parsedXML, "( ", sep = "") #first panel + }else{parsedXML <- paste(parsedXML, "\n\n\t&\n\n( ", sep = "")} invert <- xmlValue(panel[["invert"]]) - if(invert == "1"){parsedXML <- paste(parsedXML, "!(", sep = "") } + if(invert == "1"){parsedXML <- paste(parsedXML, "!( ", sep = "") } #add the children items <- xmlElementsByTagName(panel, "item") @@ -313,19 +377,19 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = #if constraint operator and constraint value are given, get these childNames <- names(item) - if(grepl("value_operator", childNames)){ - valueOperator <- xmlValue(item[["value_operator"]]) + if("constrain_by_value" %in% childNames){ + valueConstraints <- item[["constrain_by_value"]] + valueOperator <- xmlValue(valueConstraints[["value_operator"]]) parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") + valueConstraint <- xmlValue(valueConstraints[["value_constraint"]]) + parsedXML <- paste(parsedXML, " ", valueConstraint, " ", sep = "") + } - if(grepl("value_constraint",childNames)){ - valueConstraint <- xmlValue(item[["value_constraint"]]) - parsedXML <- paste(parsedXML, " ", valueConstraint, sep = "") - } } #close brackets for panel - if(invert == "1"){parsedXML <- paste(parsedXML, "))", sep = "") - }else{parsedXML <- paste(parsedXML, ")", sep = "") } + if(invert == "1"){parsedXML <- paste(parsedXML, " ))", sep = "") + }else{parsedXML <- paste(parsedXML, " )", sep = "") } } } if(parsedXML == ""){warning("Something went wrong with making a human readable version of the XML. @@ -374,7 +438,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = constraint$concept <- patientsetConstraints[[2]] constraint$value <- patientsetConstraints[[3]] - if(class(constraint$value) == "name"){ + if(class(constraint$value) == "name" | class(constraint$value) == "call"){ tmpValue <- try(eval(constraint$value, envir = globalenv()), silent = T) if(class(tmpValue) == "try-error"){ try_error <- attr(tmpValue, "condition")$message @@ -389,6 +453,10 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = ".\nObject length of \'", constraint$value , "\' is larger than 1.", "Only a single input value (string/number) is allowed as a constraint_value.")) } + if(is.na(tmpValue)){ + stop(paste("Content of \'",.expressionToText(constraint$value), "\' is 'NA'.", + " A constraint value cannot be a missing value.", sep = "")) + } constraint$value <- tmpValue } @@ -471,9 +539,9 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = # you cannot apply relational operations to the high dimensional node stop(paste("Incorrect use of a high dimensional data node in (sub)constraint: ", .expressionToText(patientsetConstraints),".", - "\nYou can only use high dimensional nodes for defining patient sets by supplying the node name ", - "alone (e.g. \"mRNA day1\"); you cannot apply a relational operation (such as \"mRNA day1 < 0\")", - "to the node. \nIf you supply the high dimensional node name, ", + "\nHigh dimensional nodes can only be used for defining patient sets by supplying the node name ", + "alone (e.g. \"mRNA day1\"); it is not possible to apply a relational operation (such as \"mRNA day1 < 0\")", + " to the node. \nWhen a high dimensional node name is supplied, ", "all patients that have data for that high dimensional node will be selected.", sep = "")) } @@ -539,7 +607,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = subconstraint <- .expressionToText(subconstraint) #if not string: get the value of the variable/object. Value should be one string. - if(class(concept) == "name"){ + if(class(concept) == "name" | class(concept) == "call"){ result <- try(eval(concept, envir = globalenv()), silent = T) if(class(result) == "try-error"){ try_error <- attr(result, "condition")$message @@ -547,13 +615,17 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = stop(err_message) } if(length(result) >1){ - write(paste("The content of object: \'", concept, "\' is:", sep = "" ),"") + write(paste("The content of object: \'", .expressionToText(concept), "\' is:", sep = "" ),"") print(result) stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, - ".\nObject length of \'", concept , + ".\nObject length of \'", .expressionToText(concept), "\' is larger than 1. Only a single string is allowed for specifying the concept.", - "The content of this concept variable is printed above this error message.", sep = "")) + "The content of this variable is printed above this error message.", sep = "")) } + if(is.na(result)){ + stop(paste("Content of \'",.expressionToText(concept), "\' is 'NA'.", + " Cannot use 'NA' as concept specification.", sep = "")) + } concept <- result } #concept should be a string. @@ -603,7 +675,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = } if(is.concept.link){ - message("Detecting a concept.link. Will attempt to find matching concept path.") + message("\nDetecting a concept.link. Will attempt to find matching concept path.") conceptMatch <- grep(concept, studyConcepts$api.link.self.href) if(length(conceptMatch) > 1){ conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, @@ -615,11 +687,11 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = identicalM <- "" if(identical.match){identicalM <- "identical(literal) "} if(length(conceptMatch) == 0){ - stop(paste("No ", identicalM, "match found for concept or categorical value \'", orig_concept, - "\', found in subconstraint: ", subconstraint, + stop(paste("No ", identicalM, "matching concept or categorical value found in this study for \'", + orig_concept, "\', found in subconstraint: ", subconstraint, "\nNote: The supplied concept in the constraint definition can be a full or partial ", - "match to the concept name (and can even contain regular expressions: pattern matching will be done as", - " done for the grep function, ignoring case), or it can be a concept.link or a concept.path.", + "match to the concept name, and can even contain regular expressions (pattern matching will be done as", + " done in the grep function, ignoring case) or it can be a concept.link or a concept.path.", "\nIn case of a categorical concept; the value part of the constraint has to be a literal match to one", " of the possible categorical values for that concept." , sep = "")) } @@ -628,18 +700,18 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = # If constraints are supplied in the form of {concept}{operator}{constraint_value}, the concept should be an end leave # (ie. data node), either categorical or numerical, and if it's categorical it should be an end leave and not a # categorical value. If only a concept is supplied as a constraint, it is possible to also use other concepts that - # are not end leaves - in that case testIfEndLeave should be FALSE. + # are not end leaves, and high dimensional data nodes - in that case testIfEndLeave should be FALSE. is.endLeaf <- studyConcepts$endLeaf[conceptMatch] == "YES" if(!is.endLeaf & testIfEndLeave){ stop(paste("The supplied concept \'", concept, "\' is not a data node (ie. not an end leaf of the transmart tree).", - "The supplied concept name/path/link must point to a single numerical, categorical or high dimensional", + "The supplied concept name/path/link must point to a single numerical or categorical", " data node (end leaf).", sep = "")) } matched_concept = list(conceptPath = studyConcepts$fullName[conceptMatch], conceptType = studyConcepts$type[conceptMatch]) - message(paste("Matched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, + message(paste("\nMatched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, "\'\n to concept (full path): \'", matched_concept$conceptPath, "\'\n", sep = "") ) return(matched_concept) } @@ -656,10 +728,12 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = "\nPlease use the concept path instead of the concept name to specify the concept.", "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).", sep = "")) } - message(paste("Multiple matching concepts found for the string \'", concept,"\'. One identical match was found: \'", + message(paste("\nMultiple matching concepts found for the string \'", concept, + "\'. One identical match was found (ignoring case): \'", concept_list[matching_indices], "\'.\nThis match is selected.", "\nFor more precise matching use full-length concept names, paths, or links,", - " and/or include beginning/end of string symbols (^/$) - see ?regexp", sep = "")) + " and/or include beginning/end of string symbols (^/$) - see ?regexp.", + "Note: regexp can only be used for specifying concept names or links, not paths",sep = "")) } #if not literal match take the shortest match @@ -667,7 +741,7 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = paths_tmp<- concept_list[matching_indices] shortest_match<- matching_indices[which.min(nchar(paths_tmp))] matching_indices<- shortest_match - message(paste("Multiple matching concepts found for the string \'", concept,"\', selecting shortest match: \'", + message(paste("\nMultiple matching concepts found for the string \'", concept,"\', selecting shortest match: \'", paste(concept_list[shortest_match], collapse = ","), "\'.", "\nFor more precise matching use full-length names or paths,", " and/or include beginning/end of string symbols (^/$) - see ?regexp", sep = "")) diff --git a/inst/unittests/resources/gse8581concepts.txt b/inst/unittests/resources/gse8581concepts.txt new file mode 100644 index 0000000..03b5fb4 --- /dev/null +++ b/inst/unittests/resources/gse8581concepts.txt @@ -0,0 +1,38 @@ +name fullName type api.link.self.href endLeaf +Afro American \Public Studies\GSE8581\Subjects\Ethnicity\Afro American\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Ethnicity/Afro%20American NO +Age (year) \Public Studies\GSE8581\Subjects\Age (year)\ NUMERIC /studies/gse8581/concepts/Subjects/Age%20%28year%29 YES +Biomarker_Data \Public Studies\GSE8581\MRNA\Biomarker_Data\ UNKNOWN /studies/gse8581/concepts/MRNA/Biomarker_Data NO +carcinoid \Public Studies\GSE8581\Endpoints\Diagnosis\carcinoid\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/carcinoid NO +Caucasian \Public Studies\GSE8581\Subjects\Ethnicity\Caucasian\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Ethnicity/Caucasian NO +chronic obstructive pulmonary disease \Public Studies\GSE8581\Subjects\Lung Disease\chronic obstructive pulmonary disease\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/chronic%20obstructive%20pulmonary%20disease NO +control \Public Studies\GSE8581\Subjects\Lung Disease\control\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/control NO +Diagnosis \Public Studies\GSE8581\Endpoints\Diagnosis\ CATEGORICAL_NODE /studies/gse8581/concepts/Endpoints/Diagnosis YES +emphysema \Public Studies\GSE8581\Endpoints\Diagnosis\emphysema\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/emphysema NO +Endpoints \Public Studies\GSE8581\Endpoints\ UNKNOWN /studies/gse8581/concepts/Endpoints NO +Ethnicity \Public Studies\GSE8581\Subjects\Ethnicity\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Ethnicity YES +female \Public Studies\GSE8581\Subjects\Sex\female\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Sex/female NO +FEV1 \Public Studies\GSE8581\Endpoints\FEV1\ NUMERIC /studies/gse8581/concepts/Endpoints/FEV1 YES +Forced Expiratory Volume Ratio \Public Studies\GSE8581\Endpoints\Forced Expiratory Volume Ratio\ NUMERIC /studies/gse8581/concepts/Endpoints/Forced%20Expiratory%20Volume%20Ratio YES +giant bullae \Public Studies\GSE8581\Endpoints\Diagnosis\giant bullae\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/giant%20bullae NO +Giant Cell Tumor \Public Studies\GSE8581\Endpoints\Diagnosis\Giant Cell Tumor\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/Giant%20Cell%20Tumor NO +GPL570_BOGUS \Public Studies\GSE8581\MRNA\Biomarker_Data\GPL570_BOGUS\ UNKNOWN /studies/gse8581/concepts/MRNA/Biomarker_Data/GPL570_BOGUS NO +Height (inch) \Public Studies\GSE8581\Subjects\Height (inch)\ NUMERIC /studies/gse8581/concepts/Subjects/Height%20%28inch%29 YES +hematoma \Public Studies\GSE8581\Endpoints\Diagnosis\hematoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/hematoma NO +Homo sapiens \Public Studies\GSE8581\Subjects\Organism\Homo sapiens\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Organism/Homo%20sapiens NO +inflammation \Public Studies\GSE8581\Endpoints\Diagnosis\inflammation\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/inflammation NO +Lung \Public Studies\GSE8581\MRNA\Biomarker_Data\GPL570_BOGUS\Lung\ HIGH_DIMENSIONAL /studies/gse8581/concepts/MRNA/Biomarker_Data/GPL570_BOGUS/Lung YES +Lung Disease \Public Studies\GSE8581\Subjects\Lung Disease\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Lung%20Disease YES +lymphoma \Public Studies\GSE8581\Endpoints\Diagnosis\lymphoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/lymphoma NO +male \Public Studies\GSE8581\Subjects\Sex\male\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Sex/male NO +metastatic non-small cell adenocarcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\metastatic non-small cell adenocarcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/metastatic%20non-small%20cell%20adenocarcinoma NO +metastatic renal cell carcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\metastatic renal cell carcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/metastatic%20renal%20cell%20carcinoma NO +MRNA \Public Studies\GSE8581\MRNA\ UNKNOWN /studies/gse8581/concepts/MRNA NO +no malignancy \Public Studies\GSE8581\Endpoints\Diagnosis\no malignancy\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/no%20malignancy NO +non-small cell adenocarcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\non-small cell adenocarcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/non-small%20cell%20adenocarcinoma NO +non-small cell squamous cell carcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\non-small cell squamous cell carcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/non-small%20cell%20squamous%20cell%20carcinoma NO +not specified \Public Studies\GSE8581\Subjects\Lung Disease\not specified\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/not%20specified NO +NSC-Mixed \Public Studies\GSE8581\Endpoints\Diagnosis\NSC-Mixed\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/NSC-Mixed NO +Organism \Public Studies\GSE8581\Subjects\Organism\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Organism YES +Sex \Public Studies\GSE8581\Subjects\Sex\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Sex YES +Subjects \Public Studies\GSE8581\Subjects\ UNKNOWN /studies/gse8581/concepts/Subjects NO +Unknown \Public Studies\GSE8581\Endpoints\Diagnosis\Unknown\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/Unknown NO diff --git a/inst/unittests/runitGetPatientSetID.R b/inst/unittests/runitGetPatientSetID.R new file mode 100644 index 0000000..743c1be --- /dev/null +++ b/inst/unittests/runitGetPatientSetID.R @@ -0,0 +1,291 @@ +# Copyright 2014, 2015, 2016 The Hyve B.V. +# Copyright 2014 Janssen Research & Development, LLC. +# +# This file is part of tranSMART R Client: R package allowing access to +# tranSMART's data via its RESTful API. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version, along with the following terms: +# +# 1. You may convey a work based on this program in accordance with +# section 5, provided that you retain the above notices. +# 2. You may convey verbatim copies of this program code as you receive +# it, in any medium, provided that you retain the above notices. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . + +# concepts table for GSE8581 +gse8581conceptsLocation <- + system.file("unittests/resources/gse8581concepts.txt", package="transmartRClient") + +gseconcepts <- read.table(gse8581conceptsLocation, header = T, stringsAsFactors = F, sep = "\t") + +### unit tests for function .checkPatientSetConstraints ### +# this should convert a string to an expression, if the constraints are +# provided as a string. Also checks that it only contains one string +#both "\"age\"" and "age" as input should result in returning "age" # suggest to use variable name without the quotes if variable with name age should be used +test.checkPatientSetConstraints.simpleString.1 <- function() { + result<- transmartRClient:::.checkPatientSetConstraints("\"age\"") + checkEquals("age", result) +} + +test.checkPatientSetConstraints.simpleString.2 <- function() { + result<- transmartRClient:::.checkPatientSetConstraints("age") + checkEquals("age", result) +} + +test.checkPatientSetConstraints.object <- function() { + result<- transmartRClient:::.checkPatientSetConstraints("concepts[1]") + expected <- substitute(concepts[1]) + checkEquals(expected, result) +} + +test.checkPatientSetConstraints.constraintDefinitionString <- function() { + result<- transmartRClient:::.checkPatientSetConstraints("\"age\" < 60") + expected <- substitute("age" < 60) + checkEquals(expected, result) +} + +#should only work with a single string +test.checkPatientSetConstraints.constraintDefinitionMultipleStrings <- function() { + input <- c("\"age\" < 60", "Male") + checkException(transmartRClient:::.checkPatientSetConstraints(input)) +} + +test.checkPatientSetConstraints.constraintDefinitionExpression <- function() { + result<- transmartRClient:::.checkPatientSetConstraints(substitute("age" < 60)) + expected <- substitute("age" < 60) + checkEquals(expected, result) +} + + +### unit test for function .buildXMLquery(patientset.constraints, studyConcepts) ### +## supplying a single concept only, without constraint operator and constraint value" + +# supplying a concept that is data node +test.buildXMLquery.datanode <- function() { + result <- transmartRClient:::.buildXMLquery("Age", gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#concept that is not a data node +test.buildXMLquery.nonDataNode <- function() { + result <- transmartRClient:::.buildXMLquery("Subjects", gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +# categorical value (end leaf) +test.buildXMLquery.categoricalValue <- function() { + result <- transmartRClient:::.buildXMLquery("control", gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Lung Disease\\control\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +# non-existing data node (a string that does nat match any concepts) +test.buildXMLquery.notExistingConcept <- function() { + checkException( transmartRClient:::.buildXMLquery("Nonsense", gseconcepts, "GSE8581")) +} + +#concept link +test.buildXMLquery.conceptLink<- function() { + input<-substitute("/studies/gse8581/concepts/Subjects/Age%20%28year%29") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +# concept path. +test.buildXMLquery.conceptPath <- function() { + input<-substitute("\\Public Studies\\GSE8581\\Subjects\\Age (year)\\") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +## constraints consisting of concept with constraint value +# simple constraint, consisting of only one concept plus constraint value +test.buildXMLquery.simpleConstraint<- function() { + input <- substitute("age" < 65) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +# non-existing value for categorical concept +test.buildXMLquery.nonExistingCategoricalValue <- function() { + input <- substitute("sex" == "unknown") + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#relational operation shouldn't work with high dim node +test.buildXMLquery.relationalOperationHighDimNode<- function() { + input <- substitute("lung" < 65) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#relational operation shouldn't work with a concept that is not a datanode +test.buildXMLquery.relationalOperationNonDataNode<- function() { + input <- substitute("Subjects" < 65) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#compound constraint | +test.buildXMLquery.compoundConstraintsORed<- function() { + input <- substitute("age" < 65 | "sex" == "female") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Sex\\female\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#compound constraint & +test.buildXMLquery.compoundConstraintsANDed<- function() { + input <- substitute("age" < 65 & "sex" == "female") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Sex\\female\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#compound constraint complex +test.buildXMLquery.compoundConstraintsComplex<- function() { + input <- substitute("age" < 65 & + ("lung disease" == "control" | "lung disease" == "chronic obstructive pulmonary disease") & + "Biomarker_Data") + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Lung Disease\\control\\\n \n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Lung Disease\\chronic obstructive pulmonary disease\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\MRNA\\Biomarker_Data\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#compound constraint complex, wrong format +test.buildXMLquery.compoundConstraintsComplexWrongFormat<- function() { + input <- substitute("sex"== "female" | ("age" < 65 & "Biomarker_Data")) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#compound constraint complex, wrong format (no brackets around ORed part) +test.buildXMLquery.compoundConstraintsComplexWrongFormat2<- function() { + input <- substitute("sex"== "female" & "age" < 65 | "Biomarker_Data") + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +# concept and constraint value should also be possible to be stored in an object +test.buildXMLquery.simpleConstraintContainingObjects<- function() { + concepts <- c("age", "sex") + some_value <- 65 + input <- substitute(concepts[1] < some_value) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#constraints with double substitute (in getPatientSetID another substitute is performed on the input, so if input is +# substitute("age"<65), then input for buildXMLquery is substitute(substitute("age"<65)) +test.buildXMLquery.doubleSubstitute<- function() { + concepts <- c("age", "sex") + some_value <- 65 + input <- substitute(substitute(concepts[1] < some_value[1])) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#constraints as string in object +test.buildXMLquery.stringInObject<- function() { + concepts <- c("age", "sex") + assign("concepts", concepts, envir = .GlobalEnv) + constraint <- "concepts[1] < 65" + input <- substitute(constraint[1]) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#object name without index (this is handled differently than objects with index as the class differs) +test.buildXMLquery.objectWithoutIndex<- function() { + constraint <- "age" + input <- substitute(constraint) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + +#NA instead of constraint +test.buildXMLquery.objectWithNA<- function() { + constraints <- "age" + input <- substitute(constraints[2]) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +#NA concept +test.buildXMLquery.constraintWithNAConcept<- function() { + constraints <- "age" + input <- substitute(constraints[2] < 65) + checkException(transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581")) +} + +test.buildXMLquery.objectWithSubstitute<- function() { + tmp <- c(substitute("age" <65), substitute("sex"== "female")) + input <- substitute(tmp[1]) + result <- transmartRClient:::.buildXMLquery(input, gseconcepts, "GSE8581") + xmlQueryText <- saveXML(result, prefix = '\n') #convert XML tree to string + expected <- "\n\n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\Subjects\\Age (year)\\\n \n LT\n 65\n NUMBER\n \n \n \n \n 0\n \n \\\\Public Studies\\Public Studies\\GSE8581\\\n \n \n" + checkIdentical(expected, xmlQueryText) +} + + + + +#may also be useful for testing (need to connect to a database that has both the clinical and high dim data of gse8581): +# getPatientSetID("GSE8581", "Age") #works on transmart-dev. +# getPatientSetID("GSE8581", "Subjects") #works on transmart-dev. +# getPatientSetID("GSE8581", "control") #works on transmart-dev. +# getPatientSetID("GSE8581", "Nonsense") #isn't supposed to work +# getPatientSetID("GSE8581", "/studies/gse8581/concepts/Subjects/Age%20%28year%29") #works on transmart-dev +# getPatientSetID("GSE8581", "/studies/gse8581/concepts/Subjects/Age%20%28year%29") #works on transmart-dev +# getPatientSetID("GSE8581", "age" < 65) #works on transmart-dev. +# getPatientSetID("GSE8581", "sex" == "unknown") #isn't supposed to work +# getPatientSetID("GSE8581", "lung" < 65) #isn't supposed to work +# getPatientSetID("GSE8581", "Subjects" < 65) #isn't supposed to work +# getPatientSetID("GSE8581", "age" < 65 | "sex" == "female") #works on transmart-dev. +# getPatientSetID("GSE8581", "age" < 65 & "sex" == "female") #works on transmart-dev. +# getPatientSetID("GSE8581", "age" < 65 & ("lung disease" == "control" | "lung disease" == "chronic obstructive pulmonary disease") & "Biomarker_Data") +# getPatientSetID("GSE8581","sex"== "female" | ("age" < 65 & "Biomarker_Data")) +# getPatientSetID("GSE8581","sex"== "female" & "age" < 65 | "Biomarker_Data") +# concepts <- c("age", "sex") +# getPatientSetID("GSE8581", concepts[1] < some_value) #works on transmart-dev. +# getPatientSetID("GSE8581", substitute(concepts[1] < some_value[1])) #works on transmart-dev. +# constraint <- "concepts[1] < 65" +# getPatientSetID("GSE8581",constraint[1]) #works on transmart-dev. +# constraint <- "age" +# getPatientSetID("GSE8581",constraint) #works on transmart-dev. +# constraints <- "age" +# getPatientSetID("GSE8581",constraints[2]) #shouldn't work +# getPatientSetID("GSE8581",constraints[2] < 65) #shouldn't work +# tmp <- c(substitute("age" <65), substitute("sex"== "female")) +# getPatientSetID("GSE8581",tmp[1]) #works on transmart-dev +# concepts <- c("age", "sex") +# constraint <- "concepts[1] < 65" +# getPatientSetID("gse8581", "concepts[1] < 65") + diff --git a/man/getPatientSetID.Rd b/man/getPatientSetID.Rd index dd81ad4..647b7de 100644 --- a/man/getPatientSetID.Rd +++ b/man/getPatientSetID.Rd @@ -126,13 +126,17 @@ haven't connected to the server yet, establish a connection using the \code{\lin ## create patient.set + #selecting all patients with a value for concept "age" + getPatientSetID("GSE8581", "age") + + #selecting all patients with "age" < 65 getPatientSetID("GSE8581", "age" < 65) #or: my_concepts <- c("Age", "Sex", "Lung Disease") constraint_value <- 65 - getPatientSetID("GSE8581", my_concepts[[1]] < constraint_value) + getPatientSetID("GSE8581", my_concepts[1] < constraint_value) #multiple constraints can be combined: getPatientSetID("GSE8581", "Age" < 65 & "Sex" == "female" & ("Lung Disease" == "chronic obstructive pulmonary disease" | From 645902bd664dda0c8b6813bcf75ab70dbb76064e Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Mon, 13 Jun 2016 10:18:49 +0200 Subject: [PATCH 09/26] edit comment --- R/getPatientSetID.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/getPatientSetID.R b/R/getPatientSetID.R index f4a3e62..4d78973 100644 --- a/R/getPatientSetID.R +++ b/R/getPatientSetID.R @@ -1,4 +1,4 @@ -# Copyright 2014, 2015, 2016 The Hyve B.V. +# Copyright 2014 - 2016 The Hyve B.V. # Copyright 2014 Janssen Research & Development, LLC. # # This file is part of tranSMART R Client: R package allowing access to From ff85b8226cb6fd2f67d4399d4e16db303ed79a3f Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Tue, 19 Jul 2016 16:01:17 +0200 Subject: [PATCH 10/26] Remove wrong copyright attributions --- R/getPatientSetID.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/getPatientSetID.R b/R/getPatientSetID.R index 878d1aa..66534a3 100644 --- a/R/getPatientSetID.R +++ b/R/getPatientSetID.R @@ -1,5 +1,4 @@ # Copyright 2014 - 2016 The Hyve B.V. -# Copyright 2014 Janssen Research & Development, LLC. # # This file is part of tranSMART R Client: R package allowing access to # tranSMART's data via its RESTful API. From 9a598e6d5b0c5980b06c6e94463d49ce57bf8ccd Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Wed, 20 Jul 2016 17:41:50 +0200 Subject: [PATCH 11/26] :fish: convert getPatientSetID to tabs --- R/getPatientSetID.R | 1250 +++++++++++++++++++++---------------------- 1 file changed, 625 insertions(+), 625 deletions(-) diff --git a/R/getPatientSetID.R b/R/getPatientSetID.R index 66534a3..58098c1 100644 --- a/R/getPatientSetID.R +++ b/R/getPatientSetID.R @@ -27,530 +27,530 @@ # (c1 | c2) & (c3|c4|c5) & c6 &... where c is either a constraint built up as {concept}{operator}{constraint_value} # (e.g. "age" < 60) or a reference to a concept (e.g. "age") getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = F){ - if(missing(study.name)){stop("Provide study name")} - if(missing(patientset.constraints)){stop("Provide patientset.constraints")} - message("\nProcessing input...", "") - - # retrieve the expression that defines the constraints - patientset.constraints <- substitute(patientset.constraints) #needs to be like this, with possible later evaluation in - # parsePatientsetConstraints because otherwise things such as "age"<65 & "biomarker data" - # will result in an error (problem is the string without operator) if you - # try e.g. is.call or is.character on the input - # if constraints are supplied as string, try to parse the string - patientset.constraints <- .checkPatientSetConstraints(patientset.constraints) - - - # retrieve concept information for the given study, and only keep relevant columns. - # this will be used later to match the concepts supplied by the user as part of the constraint definition to concept - # paths. - studyConcepts <- getConcepts(study.name) - studyConcepts <- studyConcepts[, c("name", "fullName", "type", "api.link.self.href")] - studyConcepts <- .findEndLeaves(studyConcepts) - - # read the constraints given by the user, and convert this to a XML query definition in the format as expected by REST-API - xmlQuery <- .buildXMLquery(patientset.constraints, studyConcepts, study.name) - hrConstraints <- .makeSummaryOfQuery(xmlQuery) - xmlQuery <- saveXML(xmlQuery, prefix = '\n') #convert XML tree to string - if(getOption("verbose")) { message(xmlQuery) } - - # do POST request, and store result - message("\nCreating patient set...", "") - serverResult <- .transmartGetJSON("/patient_sets", post.body = xmlQuery, - post.content.type ="text/xml;charset=UTF-8", onlyContent = c(201)) - - #return patient.set ID - patientsetID <- serverResult$id - - result <- list(patientsetID = patientsetID, patientsetSize = serverResult$setSize, - input_patientset.constraints = .expressionToText(patientset.constraints), - finalQueryConstraints = hrConstraints) - - message(paste("\nBased on the input, the following constraints were defined and sent to the server (always includes study concept):\n", - result$finalQueryConstraints, sep = ""), "") - if(returnXMLquery){result[["xmlQuery"]] <- xmlQuery} - return(result) + if(missing(study.name)){stop("Provide study name")} + if(missing(patientset.constraints)){stop("Provide patientset.constraints")} + message("\nProcessing input...", "") + + # retrieve the expression that defines the constraints + patientset.constraints <- substitute(patientset.constraints) #needs to be like this, with possible later evaluation in + # parsePatientsetConstraints because otherwise things such as "age"<65 & "biomarker data" + # will result in an error (problem is the string without operator) if you + # try e.g. is.call or is.character on the input + # if constraints are supplied as string, try to parse the string + patientset.constraints <- .checkPatientSetConstraints(patientset.constraints) + + + # retrieve concept information for the given study, and only keep relevant columns. + # this will be used later to match the concepts supplied by the user as part of the constraint definition to concept + # paths. + studyConcepts <- getConcepts(study.name) + studyConcepts <- studyConcepts[, c("name", "fullName", "type", "api.link.self.href")] + studyConcepts <- .findEndLeaves(studyConcepts) + + # read the constraints given by the user, and convert this to a XML query definition in the format as expected by REST-API + xmlQuery <- .buildXMLquery(patientset.constraints, studyConcepts, study.name) + hrConstraints <- .makeSummaryOfQuery(xmlQuery) + xmlQuery <- saveXML(xmlQuery, prefix = '\n') #convert XML tree to string + if(getOption("verbose")) { message(xmlQuery) } + + # do POST request, and store result + message("\nCreating patient set...", "") + serverResult <- .transmartGetJSON("/patient_sets", post.body = xmlQuery, + post.content.type ="text/xml;charset=UTF-8", onlyContent = c(201)) + + #return patient.set ID + patientsetID <- serverResult$id + + result <- list(patientsetID = patientsetID, patientsetSize = serverResult$setSize, + input_patientset.constraints = .expressionToText(patientset.constraints), + finalQueryConstraints = hrConstraints) + + message(paste("\nBased on the input, the following constraints were defined and sent to the server (always includes study concept):\n", + result$finalQueryConstraints, sep = ""), "") + if(returnXMLquery){result[["xmlQuery"]] <- xmlQuery} + return(result) } .checkPatientSetConstraints <- function(patientsetConstraints){ - #test if it is expression and not a string. If string: try to parse - if(is.character(patientsetConstraints)){ - if(length(patientsetConstraints) > 1){ - stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. - The patient set constraints should be supplied in one single expression (or string).")} - - try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] - if(length(patientsetConstraintsParsed) == 1){ - if(is.character(patientsetConstraintsParsed)){ #e.g. happens if input string is "\"age\"" - patientsetConstraints <- patientsetConstraintsParsed - } - } - if(length(patientsetConstraintsParsed) > 1){ - message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", - "such as: \"age\" > 65. \nWill attempt to parse the constraints out of the string, converting it", - "into an expression...")) - patientsetConstraints <- patientsetConstraintsParsed - } - }, silent = T - ) + #test if it is expression and not a string. If string: try to parse + if(is.character(patientsetConstraints)){ + if(length(patientsetConstraints) > 1){ + stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. + The patient set constraints should be supplied in one single expression (or string).")} + + try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] + if(length(patientsetConstraintsParsed) == 1){ + if(is.character(patientsetConstraintsParsed)){ #e.g. happens if input string is "\"age\"" + patientsetConstraints <- patientsetConstraintsParsed + } + } + if(length(patientsetConstraintsParsed) > 1){ + message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", + "such as: \"age\" > 65. \nWill attempt to parse the constraints out of the string, converting it", + "into an expression...")) + patientsetConstraints <- patientsetConstraintsParsed + } + }, silent = T + ) } - return(patientsetConstraints) + return(patientsetConstraints) } # parse the constraints, and turn it into a query in XML format .buildXMLquery <- function(patientset.constraints, studyConcepts, study.name){ - - ## parse the expression containing the constraints and translate this into a query definition in XML format - parsedConstraintsXMLlist <- .parsePatientSetConstraints(patientset.constraints, studyConcepts) - - # parsePatientSetConstraints returns a list with XML trees, these trees all either have items as top XMLnodes or - # panels. If the top nodes of the trees are items, add these items to a panel node and add this new node to a list. - if(xmlName(parsedConstraintsXMLlist[[1]]) == "item"){ - parsedConstraintsXMLlist <- .makePanelList(parsedConstraintsXMLlist) - } - - #add one panel with study.name, ensuring that only patients from the specified study are selected - parsedConstraintsXMLlist <- .addStudyPanel(parsedConstraintsXMLlist, study.name, studyConcepts[1, "fullName"]) - - # build XML formatted query - xmlQuery <- xmlNode("qd:query_definition", namespaceDefinitions = c(qd="http://www.i2b2.org/xsd/cell/crc/psm/1.1/")) - for(i in 1:length(parsedConstraintsXMLlist)){ - xmlQuery <- append.XMLNode(xmlQuery, parsedConstraintsXMLlist[[i]]) - } - return(xmlQuery) + + ## parse the expression containing the constraints and translate this into a query definition in XML format + parsedConstraintsXMLlist <- .parsePatientSetConstraints(patientset.constraints, studyConcepts) + + # parsePatientSetConstraints returns a list with XML trees, these trees all either have items as top XMLnodes or + # panels. If the top nodes of the trees are items, add these items to a panel node and add this new node to a list. + if(xmlName(parsedConstraintsXMLlist[[1]]) == "item"){ + parsedConstraintsXMLlist <- .makePanelList(parsedConstraintsXMLlist) + } + + #add one panel with study.name, ensuring that only patients from the specified study are selected + parsedConstraintsXMLlist <- .addStudyPanel(parsedConstraintsXMLlist, study.name, studyConcepts[1, "fullName"]) + + # build XML formatted query + xmlQuery <- xmlNode("qd:query_definition", namespaceDefinitions = c(qd="http://www.i2b2.org/xsd/cell/crc/psm/1.1/")) + for(i in 1:length(parsedConstraintsXMLlist)){ + xmlQuery <- append.XMLNode(xmlQuery, parsedConstraintsXMLlist[[i]]) + } + return(xmlQuery) } # determine for each concept in the concept table whether a concept is an end leaf of the tree, ie. if it is a data node # (which can be either a numeric, categorical or highdim node) .findEndLeaves <- function(conceptListStudy){ - conceptTypes <- unique(conceptListStudy$type) - - if( any(! conceptTypes %in% c("CATEGORICAL_OPTION", "NUMERIC", "UNKNOWN", "HIGH_DIMENSIONAL"))){ - warning("Unexpected concept type for one or more concepts in the selected study. - Determination which concepts are end-leaves of the tree might not work correcty in all cases. - This only affects the patient selection query if concepts with undetermined type are included in the query. - In that case this message is followed by an accompanying error. - You can help fix it by contacting us. Type ?transmartRClient for contact details. - \n") - } - - # concepts with type numeric and high_dimensional are end-leaves, - # concepts with type categorical_options are not end-leaves - endLeaf <- "" - conceptListStudy <- cbind(conceptListStudy, endLeaf, stringsAsFactors = F) - conceptListStudy$endLeaf[conceptListStudy$type %in% c("NUMERIC", "HIGH_DIMENSIONAL")] <- "YES" - conceptListStudy$endLeaf[conceptListStudy$type == "CATEGORICAL_OPTION"] <- "NO" - - #find categorical data nodes, and set type of categorical end-leave (data node) to "CATEGORICAL_NODE" - # concepts with 'type' categorical_option are the concept values. Take the concept path of the concept values and - # remove the last part to retrieve a list of concept paths for categorical nodes. - categoricalOptionsPaths <- conceptListStudy$fullName[conceptListStudy$type == "CATEGORICAL_OPTION"] - categoricalNodes <- sub("\\\\[^\\]*\\\\$", "\\\\",categoricalOptionsPaths) # remove last part of concept path - # containing the categorical value, to obtain path to categorical node - categoricalNodes <- unique(categoricalNodes) - - conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "YES" - conceptListStudy$type[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "CATEGORICAL_NODE" - conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & !conceptListStudy$fullName %in% categoricalNodes] <- "NO" - - return(conceptListStudy) + conceptTypes <- unique(conceptListStudy$type) + + if( any(! conceptTypes %in% c("CATEGORICAL_OPTION", "NUMERIC", "UNKNOWN", "HIGH_DIMENSIONAL"))){ + warning("Unexpected concept type for one or more concepts in the selected study. + Determination which concepts are end-leaves of the tree might not work correcty in all cases. + This only affects the patient selection query if concepts with undetermined type are included in the query. + In that case this message is followed by an accompanying error. + You can help fix it by contacting us. Type ?transmartRClient for contact details. + \n") + } + + # concepts with type numeric and high_dimensional are end-leaves, + # concepts with type categorical_options are not end-leaves + endLeaf <- "" + conceptListStudy <- cbind(conceptListStudy, endLeaf, stringsAsFactors = F) + conceptListStudy$endLeaf[conceptListStudy$type %in% c("NUMERIC", "HIGH_DIMENSIONAL")] <- "YES" + conceptListStudy$endLeaf[conceptListStudy$type == "CATEGORICAL_OPTION"] <- "NO" + + #find categorical data nodes, and set type of categorical end-leave (data node) to "CATEGORICAL_NODE" + # concepts with 'type' categorical_option are the concept values. Take the concept path of the concept values and + # remove the last part to retrieve a list of concept paths for categorical nodes. + categoricalOptionsPaths <- conceptListStudy$fullName[conceptListStudy$type == "CATEGORICAL_OPTION"] + categoricalNodes <- sub("\\\\[^\\]*\\\\$", "\\\\",categoricalOptionsPaths) # remove last part of concept path + # containing the categorical value, to obtain path to categorical node + categoricalNodes <- unique(categoricalNodes) + + conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "YES" + conceptListStudy$type[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "CATEGORICAL_NODE" + conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & !conceptListStudy$fullName %in% categoricalNodes] <- "NO" + + return(conceptListStudy) } # parsePatientSetConstraints takes an expression defining the constraints for the patientset and returns # either a list of item XMLtrees or list of panel XMLtrees .parsePatientSetConstraints <- function(patientsetConstraints, studyConcepts){ - - relationalOperators <- c("<", ">", "<=",">=", "==", "!=") - logicalOperators <- c("&","&&", "|", "||") - allowedOperators <- c(relationalOperators, logicalOperators) - - verbose <- getOption("verbose") - # construct a message that's used later on, when an error occurs. This message includes a listing of the different - # elements (sub units) of the constraint expression, if verbose == T - elementsMsg <- "" - if(verbose){ - subUnits <- "" - for(i in 1:length(patientsetConstraints)){ - subUnits <- paste(subUnits, paste("\n\tElement ", i,": ", .expressionToText(patientsetConstraints[[i]]), sep = "")) + + relationalOperators <- c("<", ">", "<=",">=", "==", "!=") + logicalOperators <- c("&","&&", "|", "||") + allowedOperators <- c(relationalOperators, logicalOperators) + + verbose <- getOption("verbose") + # construct a message that's used later on, when an error occurs. This message includes a listing of the different + # elements (sub units) of the constraint expression, if verbose == T + elementsMsg <- "" + if(verbose){ + subUnits <- "" + for(i in 1:length(patientsetConstraints)){ + subUnits <- paste(subUnits, paste("\n\tElement ", i,": ", .expressionToText(patientsetConstraints[[i]]), sep = "")) + } + elementsMsg <- paste("\nElements of the (sub)constraint after parsing", subUnits,sep = "") } - elementsMsg <- paste("\nElements of the (sub)constraint after parsing", subUnits,sep = "") - } - - errorMsg <- paste("Incorrect (sub)constraint definition, or failure to parse the (sub)constraint definition correctly.", - "Check the format of the constraint. \nFor more details about how to specify patient set constraints,", - "see the help/manual page of this function. \n(Sub)constraint: ", - .expressionToText(patientsetConstraints), elementsMsg) - - # if length(patientsetConstraints) == 3, then the expression contains three elements, so it is either a low-level - # constraint of the form {concept}{constraint_operator}{constraint_value} or it is a concatenation of constraints - # separated by either an AND or OR operator (of form {some constraint(s)}{ &, &&, | or || }{some constraint(s)} ) - # alternatively it is an expression containing the call to substitute() or an object with index, e.g. variable[1], data.frame$firstColumn[firstRow],etc - # contain a string specifying a concept, or a string that in itself is a constraint definition, or an expression as - # created with subsitute for specifying a constraint . - - if(is.symbol(patientsetConstraints)){ - firstElement_in_allowedOperators <- F - }else{ - firstElement <- as.character(patientsetConstraints[[1]]) - firstElement_in_allowedOperators <- firstElement%in% allowedOperators - } - - if(length(patientsetConstraints) == 3 & firstElement_in_allowedOperators){ - constraintOperator <- firstElement - constraint <- list() + errorMsg <- paste("Incorrect (sub)constraint definition, or failure to parse the (sub)constraint definition correctly.", + "Check the format of the constraint. \nFor more details about how to specify patient set constraints,", + "see the help/manual page of this function. \n(Sub)constraint: ", + .expressionToText(patientsetConstraints), elementsMsg) + + # if length(patientsetConstraints) == 3, then the expression contains three elements, so it is either a low-level + # constraint of the form {concept}{constraint_operator}{constraint_value} or it is a concatenation of constraints + # separated by either an AND or OR operator (of form {some constraint(s)}{ &, &&, | or || }{some constraint(s)} ) + # alternatively it is an expression containing the call to substitute() or an object with index, e.g. variable[1], data.frame$firstColumn[firstRow],etc + # contain a string specifying a concept, or a string that in itself is a constraint definition, or an expression as + # created with subsitute for specifying a constraint . - # in case where the (sub)constraint is a concatation of subconstraints, combined by an AND or OR operator - # (e.g. "age" > 12 & "sex" = "Female"): element [[1]] contains the AND or OR operator, element [[2]] the - # subconstraint to the left of the operator, element [[3]] is the subconstraint to the right of the operator - # in case that the (sub)constraint is not a concatenation of subconstraints, but holds a single criterium that a - # concept has to satisfy to: - # [[1]] contains a relational operator, [[2]] the concept, [[3]] the constraint value - is.singleConstraint <- constraintOperator %in% relationalOperators - if(is.singleConstraint){ - itemXMLlist <- list(.parseSingleConstraint(patientsetConstraints, studyConcepts)) - return(itemXMLlist) + if(is.symbol(patientsetConstraints)){ + firstElement_in_allowedOperators <- F }else{ - # it's a concatenation of constraints: call function again on the subconstraints. - # right now it only supports the format where the & operators are always the highest level operators and the | - # operators are only used as lowest level, forcing the format: (c1|c2)&c3&(c4|c5|c6|...)& ... - - treeBeforeOperator <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) - treeAfterOperator <- .parsePatientSetConstraints(patientsetConstraints[[3]], studyConcepts) - - #if there is an "OR" operation inbetween two subconstraints, the combination of those two subconstraints cannot - # have an & anymore (this is for forcing the strict format for constraint definition described above) - if(constraintOperator == "|" ) { - if(grepl("&", .expressionToText(patientsetConstraints))){ - stop(paste("Wrong format of (sub)constraint definition. Found in (sub)constraint: ", - .expressionToText(patientsetConstraints), - "\nRight now the only format supported for defining patientset constraints is one where the & ", - "operator is on the highest level of the constraint definition \nand the | (or) operator on the ", - "lowest (second) level, \nie. the format is 'x1' or (in case of multiple \'&\' operations) ", - "'x1 & x2 & ...', \nwhere x1, x2, etc. can contain one or more subconstraints (called c here) ", - "separated by an | (or) operator, ie. x = c1 or x = (c1 | c2 | ...),", - "\n where c is a single constraint such as \'\"age\" < 60\' or a reference to a concept.", - "\n Examples of valid constraints: c1, c1|c2, c1&c2&c3, (c1|c2)&c3&(c4|c5|c6)", sep = "" - )) + firstElement <- as.character(patientsetConstraints[[1]]) + firstElement_in_allowedOperators <- firstElement%in% allowedOperators + } + + if(length(patientsetConstraints) == 3 & firstElement_in_allowedOperators){ + constraintOperator <- firstElement + + constraint <- list() + + # in case where the (sub)constraint is a concatation of subconstraints, combined by an AND or OR operator + # (e.g. "age" > 12 & "sex" = "Female"): element [[1]] contains the AND or OR operator, element [[2]] the + # subconstraint to the left of the operator, element [[3]] is the subconstraint to the right of the operator + # in case that the (sub)constraint is not a concatenation of subconstraints, but holds a single criterium that a + # concept has to satisfy to: + # [[1]] contains a relational operator, [[2]] the concept, [[3]] the constraint value + is.singleConstraint <- constraintOperator %in% relationalOperators + if(is.singleConstraint){ + itemXMLlist <- list(.parseSingleConstraint(patientsetConstraints, studyConcepts)) + return(itemXMLlist) + }else{ + # it's a concatenation of constraints: call function again on the subconstraints. + # right now it only supports the format where the & operators are always the highest level operators and the | + # operators are only used as lowest level, forcing the format: (c1|c2)&c3&(c4|c5|c6|...)& ... + + treeBeforeOperator <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) + treeAfterOperator <- .parsePatientSetConstraints(patientsetConstraints[[3]], studyConcepts) + + #if there is an "OR" operation inbetween two subconstraints, the combination of those two subconstraints cannot + # have an & anymore (this is for forcing the strict format for constraint definition described above) + if(constraintOperator == "|" ) { + if(grepl("&", .expressionToText(patientsetConstraints))){ + stop(paste("Wrong format of (sub)constraint definition. Found in (sub)constraint: ", + .expressionToText(patientsetConstraints), + "\nRight now the only format supported for defining patientset constraints is one where the & ", + "operator is on the highest level of the constraint definition \nand the | (or) operator on the ", + "lowest (second) level, \nie. the format is 'x1' or (in case of multiple \'&\' operations) ", + "'x1 & x2 & ...', \nwhere x1, x2, etc. can contain one or more subconstraints (called c here) ", + "separated by an | (or) operator, ie. x = c1 or x = (c1 | c2 | ...),", + "\n where c is a single constraint such as \'\"age\" < 60\' or a reference to a concept.", + "\n Examples of valid constraints: c1, c1|c2, c1&c2&c3, (c1|c2)&c3&(c4|c5|c6)", sep = "" + )) + } + itemXMLlist <- c(treeBeforeOperator,treeAfterOperator) + return(itemXMLlist) + } + + # treeBeforeOperator/treeAfterOperator can be either a list of items or a list of panels + # if it contains a list of items: add the items of that list to a panel node + if(constraintOperator == "&"){ + if(xmlName(treeBeforeOperator[[1]]) == "item"){ + beforePanels <- .makePanelList(treeBeforeOperator) + } + if(xmlName(treeBeforeOperator[[1]]) == "panel"){ + beforePanels <- treeBeforeOperator + } + if(xmlName(treeAfterOperator[[1]]) == "item"){ + afterPanels <- .makePanelList(treeAfterOperator) + } + if(xmlName(treeAfterOperator[[1]]) == "panel"){ + afterPanels <- treeAfterOperator + } + + panelList <- c(beforePanels, afterPanels) + return(panelList) + } } - itemXMLlist <- c(treeBeforeOperator,treeAfterOperator) + }else if(class(patientsetConstraints) == "("){ + # expression is surrounded by brackets: take expression between brackets and call function again + # element [[2]] contains the expression between the brackets, element [[1]] is '(' + xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) + return(xmlTreeList) + }else if(length(patientsetConstraints) == 1 & is.character(patientsetConstraints)) { + # Then the (sub)constraint should consist of only a specification of a concept. + # This will result in selection of all patients that have a value for this concept. + # Concept specification can be a string containing a pattern to match to the concept name or a concept path or link, + #retrieve concept path + conceptPath <- .getConstraintConcept(patientsetConstraints, patientsetConstraints, studyConcepts, + identical.match = F, testIfEndLeave = F)[["conceptPath"]] + + # make itemTree for that concept + itemXMLlist <- list(xmlNode("item", xmlNode("item_key", .makeItemKey(conceptPath)))) return(itemXMLlist) - } - - # treeBeforeOperator/treeAfterOperator can be either a list of items or a list of panels - # if it contains a list of items: add the items of that list to a panel node - if(constraintOperator == "&"){ - if(xmlName(treeBeforeOperator[[1]]) == "item"){ - beforePanels <- .makePanelList(treeBeforeOperator) - } - if(xmlName(treeBeforeOperator[[1]]) == "panel"){ - beforePanels <- treeBeforeOperator + }else if(class(patientsetConstraints) == "name" | class(patientsetConstraints) == "call"){ + # alternatively it is an expression containing a call to substite or an object (with or without index), + # e.g. variables[1] or variable, data.frame$firstColumn[firstRow],etc - this object can + # contain a string specifying a concept, or a string that in itself is a constraint definition, or an expression as + # created with subsitute for specifying a constraint . + # e.g. when following input is given: concepts<- c("age", "sex"); getPatientSetID("Some study", concepts[1]) or + # tmp <- c(substitute("age" <65), substitute("sex"== "female")); getPatientSetID("Some study", tmp[[1]]) + # or an object with strings specifying the constraints, and then the strings shoudl be turned into expressions too. + # tmp <- c("\"age\" <65", "\"sex\"== \"female\""); getPatientSetID("Some study", tmp[1]) + # try to evaluate the expression and find a matching constraint concept + result <- try(eval(patientsetConstraints, envir = globalenv()), silent = T) + if(class(result) == "try-error"){ + + stop(paste(attr(result, "condition")$message, "\n",errorMsg, sep = "")) } - if(xmlName(treeAfterOperator[[1]]) == "item"){ - afterPanels <- .makePanelList(treeAfterOperator) + if(is.list(result)) + { + if(length(result) > 1){ + stop(paste("Incorrect input for patient set constraints.\n", + "Evaluation of input", .expressionToText(patientsetConstraints), + "results in a list with more than one element", + "while the function expects only a single string or a single expression", + "(as created with function substitute), not multiple.")) + } + if(length(result) == 1){ + warning(paste("Evaluation of input", .expressionToText(patientsetConstraints), + "results in a list with a single element.", + "Expected is a string or an expression (as created with function substitute).", + "Will try to use this single element in the list")) + result <- result[[1]] + } } - if(xmlName(treeAfterOperator[[1]]) == "panel"){ - afterPanels <- treeAfterOperator + if(length(result)==1) + { + if(is.na(result)) + { + stop(paste("Content of \'",.expressionToText(patientsetConstraints), "\' is 'NA'.", + " Cannot use 'NA' as constraint definition/concept specification.", sep = "")) + } } + result <- .checkPatientSetConstraints(result) #parses constraint definition out of string, if applicable + patientsetConstraints <- result - panelList <- c(beforePanels, afterPanels) - return(panelList) - } - } - }else if(class(patientsetConstraints) == "("){ - # expression is surrounded by brackets: take expression between brackets and call function again - # element [[2]] contains the expression between the brackets, element [[1]] is '(' - xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) - return(xmlTreeList) - }else if(length(patientsetConstraints) == 1 & is.character(patientsetConstraints)) { - # Then the (sub)constraint should consist of only a specification of a concept. - # This will result in selection of all patients that have a value for this concept. - # Concept specification can be a string containing a pattern to match to the concept name or a concept path or link, - #retrieve concept path - conceptPath <- .getConstraintConcept(patientsetConstraints, patientsetConstraints, studyConcepts, - identical.match = F, testIfEndLeave = F)[["conceptPath"]] - - # make itemTree for that concept - itemXMLlist <- list(xmlNode("item", xmlNode("item_key", .makeItemKey(conceptPath)))) - return(itemXMLlist) - }else if(class(patientsetConstraints) == "name" | class(patientsetConstraints) == "call"){ - # alternatively it is an expression containing a call to substite or an object (with or without index), - # e.g. variables[1] or variable, data.frame$firstColumn[firstRow],etc - this object can - # contain a string specifying a concept, or a string that in itself is a constraint definition, or an expression as - # created with subsitute for specifying a constraint . - # e.g. when following input is given: concepts<- c("age", "sex"); getPatientSetID("Some study", concepts[1]) or - # tmp <- c(substitute("age" <65), substitute("sex"== "female")); getPatientSetID("Some study", tmp[[1]]) - # or an object with strings specifying the constraints, and then the strings shoudl be turned into expressions too. - # tmp <- c("\"age\" <65", "\"sex\"== \"female\""); getPatientSetID("Some study", tmp[1]) - # try to evaluate the expression and find a matching constraint concept - result <- try(eval(patientsetConstraints, envir = globalenv()), silent = T) - if(class(result) == "try-error"){ - - stop(paste(attr(result, "condition")$message, "\n",errorMsg, sep = "")) - } - if(is.list(result)) - { - if(length(result) > 1){ - stop(paste("Incorrect input for patient set constraints.\n", - "Evaluation of input", .expressionToText(patientsetConstraints), - "results in a list with more than one element", - "while the function expects only a single string or a single expression", - "(as created with function substitute), not multiple.")) - } - if(length(result) == 1){ - warning(paste("Evaluation of input", .expressionToText(patientsetConstraints), - "results in a list with a single element.", - "Expected is a string or an expression (as created with function substitute).", - "Will try to use this single element in the list")) - result <- result[[1]] - } - } - if(length(result)==1) - { - if(is.na(result)) - { - stop(paste("Content of \'",.expressionToText(patientsetConstraints), "\' is 'NA'.", - " Cannot use 'NA' as constraint definition/concept specification.", sep = "")) - } + xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints, studyConcepts) + return(xmlTreeList) + }else{ + stop(errorMsg) } - result <- .checkPatientSetConstraints(result) #parses constraint definition out of string, if applicable - patientsetConstraints <- result - - xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints, studyConcepts) - return(xmlTreeList) - }else{ - stop(errorMsg) - } } # the deparse function converts expressions to strings. However it cuts the strings of at a certain bytelength, # so a long expression could result in a character vector with several portions of the original expression # this function makes one string out of the vector again .expressionToText <- function(expression){ - textExpression <- deparse(expression, width.cutoff = 500) - - if(length(textExpression)>1){ - textExpressionPasted <- gsub("^[[:blank:]]+", "", textExpression) - textExpressionPasted <- paste(textExpressionPasted, collapse = "") - - #warnings are truncated, so it doesn't necessarily print all in case of long textExpression - message(paste("\nWhile trying to convert an expression to text, the deparse function cut an expression in two.", - "\nSeparate parts:\n", paste("\t", textExpression, collapse = "\n AND \n"), - "\nThese are pasted again together. Result:\n ", textExpressionPasted)) - textExpression <- textExpressionPasted - } - return(textExpression) + textExpression <- deparse(expression, width.cutoff = 500) + + if(length(textExpression)>1){ + textExpressionPasted <- gsub("^[[:blank:]]+", "", textExpression) + textExpressionPasted <- paste(textExpressionPasted, collapse = "") + + #warnings are truncated, so it doesn't necessarily print all in case of long textExpression + message(paste("\nWhile trying to convert an expression to text, the deparse function cut an expression in two.", + "\nSeparate parts:\n", paste("\t", textExpression, collapse = "\n AND \n"), + "\nThese are pasted again together. Result:\n ", textExpressionPasted)) + textExpression <- textExpressionPasted + } + return(textExpression) } .makeSummaryOfQuery <- function(xmlQuery){ - - parsedXML <- "" - - if(all(names(xmlQuery)== "panel")){ - panels <- xmlChildren(xmlQuery) - - for(i in 1:length(panels)){ - panel <- panels[[i]] - - if(i == 1){parsedXML <- paste(parsedXML, "( ", sep = "") #first panel - }else{parsedXML <- paste(parsedXML, "\n\n\t&\n\n( ", sep = "")} - - invert <- xmlValue(panel[["invert"]]) - if(invert == "1"){parsedXML <- paste(parsedXML, "!( ", sep = "") } - - #add the children - items <- xmlElementsByTagName(panel, "item") - for(j in 1:length(items)){ - item <- items[[j]] - if(j > 1){parsedXML <- paste(parsedXML, " | ", sep = "")} - - #get concept path - item_key <- xmlValue(item[["item_key"]]) - concept_path <- gsub("\\\\\\\\Public Studies", "", item_key) - concept_path <- gsub("\\\\\\\\Private Studies", "", concept_path) - parsedXML <- paste(parsedXML, "\"", concept_path, "\"", sep = "") + + parsedXML <- "" + + if(all(names(xmlQuery)== "panel")){ + panels <- xmlChildren(xmlQuery) - #if constraint operator and constraint value are given, get these - childNames <- names(item) - if("constrain_by_value" %in% childNames){ - valueConstraints <- item[["constrain_by_value"]] - valueOperator <- xmlValue(valueConstraints[["value_operator"]]) - parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") - valueConstraint <- xmlValue(valueConstraints[["value_constraint"]]) - parsedXML <- paste(parsedXML, " ", valueConstraint, " ", sep = "") - + for(i in 1:length(panels)){ + panel <- panels[[i]] + + if(i == 1){parsedXML <- paste(parsedXML, "( ", sep = "") #first panel + }else{parsedXML <- paste(parsedXML, "\n\n\t&\n\n( ", sep = "")} + + invert <- xmlValue(panel[["invert"]]) + if(invert == "1"){parsedXML <- paste(parsedXML, "!( ", sep = "") } + + #add the children + items <- xmlElementsByTagName(panel, "item") + for(j in 1:length(items)){ + item <- items[[j]] + if(j > 1){parsedXML <- paste(parsedXML, " | ", sep = "")} + + #get concept path + item_key <- xmlValue(item[["item_key"]]) + concept_path <- gsub("\\\\\\\\Public Studies", "", item_key) + concept_path <- gsub("\\\\\\\\Private Studies", "", concept_path) + parsedXML <- paste(parsedXML, "\"", concept_path, "\"", sep = "") + + #if constraint operator and constraint value are given, get these + childNames <- names(item) + if("constrain_by_value" %in% childNames){ + valueConstraints <- item[["constrain_by_value"]] + valueOperator <- xmlValue(valueConstraints[["value_operator"]]) + parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") + valueConstraint <- xmlValue(valueConstraints[["value_constraint"]]) + parsedXML <- paste(parsedXML, " ", valueConstraint, " ", sep = "") + + } + } + + #close brackets for panel + if(invert == "1"){parsedXML <- paste(parsedXML, " ))", sep = "") + }else{parsedXML <- paste(parsedXML, " )", sep = "") } } - } - - #close brackets for panel - if(invert == "1"){parsedXML <- paste(parsedXML, " ))", sep = "") - }else{parsedXML <- paste(parsedXML, " )", sep = "") } } - } - if(parsedXML == ""){warning("Something went wrong with making a human readable version of the XML. - This does not affect the formation of the patient set")} - return(parsedXML) + if(parsedXML == ""){warning("Something went wrong with making a human readable version of the XML. + This does not affect the formation of the patient set")} + return(parsedXML) } #just needs one conceptPath, can be of any of the concepts in the study. It can be any path in column 'fullName' .addStudyPanel <- function (constraintXMLlist, study.name, conceptPath){ - # retrieve the path for the study concept, by taking only the first part of the supplied concept path up to and - # including the study.name. - # e.g. take "\\Public Studies\\GSE8581\\" from "\\Public Studies\\GSE8581\\Subjects\\Ethnicity\\Afro American\\" - splitPath <- strsplit(conceptPath, "\\\\")[[1]] - nameHit <- grep(study.name, splitPath, ignore.case = T)[1] # take the first, just in case the study.name is repeated - # in later part of path - studyPath <- paste(c(splitPath[1:nameHit], ""), collapse = "\\", sep = "") - itemKey <- .makeItemKey(studyPath) - - panel <- xmlNode("panel", - xmlNode("invert", 0), - xmlNode("item", - xmlNode("item_key", itemKey))) - constraintXMLlist <- c(constraintXMLlist, list(panel)) - return(constraintXMLlist) + # retrieve the path for the study concept, by taking only the first part of the supplied concept path up to and + # including the study.name. + # e.g. take "\\Public Studies\\GSE8581\\" from "\\Public Studies\\GSE8581\\Subjects\\Ethnicity\\Afro American\\" + splitPath <- strsplit(conceptPath, "\\\\")[[1]] + nameHit <- grep(study.name, splitPath, ignore.case = T)[1] # take the first, just in case the study.name is repeated + # in later part of path + studyPath <- paste(c(splitPath[1:nameHit], ""), collapse = "\\", sep = "") + itemKey <- .makeItemKey(studyPath) + + panel <- xmlNode("panel", + xmlNode("invert", 0), + xmlNode("item", + xmlNode("item_key", itemKey))) + constraintXMLlist <- c(constraintXMLlist, list(panel)) + return(constraintXMLlist) } # it expects a list of "item" XML trees. It will add all items to a panel XML node, # and returns that node as part of a list .makePanelList <- function(itemXMLtreeList){ - panel <- xmlNode("panel", xmlNode("invert", 0)) - for(i in 1:length(itemXMLtreeList)){ - panel<- append.XMLNode(panel, itemXMLtreeList[[i]]) - } - panel <- list(panel) - return(panel) + panel <- xmlNode("panel", xmlNode("invert", 0)) + for(i in 1:length(itemXMLtreeList)){ + panel<- append.XMLNode(panel, itemXMLtreeList[[i]]) + } + panel <- list(panel) + return(panel) } # constraint is of format: {concept definition}{relational operator}{constraint_value}, e.g. "age" < 12. .parseSingleConstraint <- function(patientsetConstraints, studyConcepts){ - constraint <- list() - - # grab the different elements of the constraint definition - constraint$operator <- as.character(patientsetConstraints[[1]]) - constraint$concept <- patientsetConstraints[[2]] - constraint$value <- patientsetConstraints[[3]] - - if(class(constraint$value) == "name" | class(constraint$value) == "call"){ - tmpValue <- try(eval(constraint$value, envir = globalenv()), silent = T) - if(class(tmpValue) == "try-error"){ - try_error <- attr(tmpValue, "condition")$message - err_message <- paste(try_error, ". Object was specified in (sub)constraint ", - .expressionToText(patientsetConstraints) , ".\n", sep = "") - stop(err_message) - } - if(length(tmpValue) >1){ - message("\nInput for constraint_value: ") - print(tmpValue) - stop(paste("Incorrect input for constraint_value in (sub)constraint: ", .expressionToText(patientsetConstraints), - ".\nObject length of \'", constraint$value , "\' is larger than 1.", - "Only a single input value (string/number) is allowed as a constraint_value.")) - } - if(is.na(tmpValue)){ - stop(paste("Content of \'",.expressionToText(constraint$value), "\' is 'NA'.", - " A constraint value cannot be a missing value.", sep = "")) - } - constraint$value <- tmpValue - } - - # a concept can be defined by a pattern matching the concept name (1), by concept.link(2), concept.path(3) or - # by giving a variable/object containing a string with one of those three - # find the concept path that corresponds to the concept, and determine the type of - # node (numerical, categorical or high dim) - constraint <- c(constraint, .getConstraintConcept(constraint$concept, patientsetConstraints, studyConcepts, - identical.match = F)) - constraint$value_operator <- NA - constraint$value_type <- NA - - if(constraint$conceptType == "NUMERIC"){ - #check if the supplied constraint value is numeric - if(!is.numeric(constraint$value)){ - stop(paste("The supplied constraint value ", deparse(constraint$value)," is not numerical, while concept ", - constraint$conceptPath, " is a numerical concept. (This was the concept selected based on the input: \'", - constraint$concept, "\'). \nEncountered in (sub)constraint: ",.expressionToText(patientsetConstraints), - sep = "" )) + constraint <- list() + + # grab the different elements of the constraint definition + constraint$operator <- as.character(patientsetConstraints[[1]]) + constraint$concept <- patientsetConstraints[[2]] + constraint$value <- patientsetConstraints[[3]] + + if(class(constraint$value) == "name" | class(constraint$value) == "call"){ + tmpValue <- try(eval(constraint$value, envir = globalenv()), silent = T) + if(class(tmpValue) == "try-error"){ + try_error <- attr(tmpValue, "condition")$message + err_message <- paste(try_error, ". Object was specified in (sub)constraint ", + .expressionToText(patientsetConstraints) , ".\n", sep = "") + stop(err_message) + } + if(length(tmpValue) >1){ + message("\nInput for constraint_value: ") + print(tmpValue) + stop(paste("Incorrect input for constraint_value in (sub)constraint: ", .expressionToText(patientsetConstraints), + ".\nObject length of \'", constraint$value , "\' is larger than 1.", + "Only a single input value (string/number) is allowed as a constraint_value.")) + } + if(is.na(tmpValue)){ + stop(paste("Content of \'",.expressionToText(constraint$value), "\' is 'NA'.", + " A constraint value cannot be a missing value.", sep = "")) + } + constraint$value <- tmpValue } - # Each individual constraint is represented as an "item" in the XML tree that holds the query definition for the - # patient.set - # construct the "item" subtree for the current constraint - constraint$item_key <- .makeItemKey(constraint$conceptPath) - constraint$value_type <- "NUMBER" + # a concept can be defined by a pattern matching the concept name (1), by concept.link(2), concept.path(3) or + # by giving a variable/object containing a string with one of those three + # find the concept path that corresponds to the concept, and determine the type of + # node (numerical, categorical or high dim) + constraint <- c(constraint, .getConstraintConcept(constraint$concept, patientsetConstraints, studyConcepts, + identical.match = F)) + constraint$value_operator <- NA + constraint$value_type <- NA + + if(constraint$conceptType == "NUMERIC"){ + #check if the supplied constraint value is numeric + if(!is.numeric(constraint$value)){ + stop(paste("The supplied constraint value ", deparse(constraint$value)," is not numerical, while concept ", + constraint$conceptPath, " is a numerical concept. (This was the concept selected based on the input: \'", + constraint$concept, "\'). \nEncountered in (sub)constraint: ",.expressionToText(patientsetConstraints), + sep = "" )) + } + + # Each individual constraint is represented as an "item" in the XML tree that holds the query definition for the + # patient.set + # construct the "item" subtree for the current constraint + constraint$item_key <- .makeItemKey(constraint$conceptPath) + constraint$value_type <- "NUMBER" #translate relational operator from R to a value operator that can be recognized in the query - constraint$value_operator <- .getValueOperator(constraint$operator, "NUMERIC") - constrain_by_value_tree <- xmlNode("constrain_by_value", - xmlNode("value_operator", constraint$value_operator), - xmlNode("value_constraint", constraint$value), - xmlNode("value_type", constraint$value_type)) - itemXMLtree <- xmlNode("item", - xmlNode("item_key", constraint$item_key), - constrain_by_value_tree) - } - - if(constraint$conceptType == "CATEGORICAL_NODE" ){ - - #check if supplied constraint value is character - if(!is.character(constraint$value)){ - warning(paste("The supplied constraint value ", constraint$value," is not of class \'character\', while concept ", - constraint$conceptPath, " is a categorical concept (ie. containing text).", - "\n(This was the concept selected based on the input: \'", - constraint$concept, "\')", "\nWill convert the value to character, but unless there is actually a ", - "categorical value that matches the constraint value, this will result in an error later on.", - "\nEncountered in (sub)constraint: ",.expressionToText(patientsetConstraints), - sep = "" )) - constraint$value <- as.character(constraint$value) + constraint$value_operator <- .getValueOperator(constraint$operator, "NUMERIC") + constrain_by_value_tree <- xmlNode("constrain_by_value", + xmlNode("value_operator", constraint$value_operator), + xmlNode("value_constraint", constraint$value), + xmlNode("value_type", constraint$value_type)) + itemXMLtree <- xmlNode("item", + xmlNode("item_key", constraint$item_key), + constrain_by_value_tree) } - #check if the given constraint value exists for the specified categorical concept - constraintValuePath <- .getConstraintConcept(constraint$value, patientsetConstraints, studyConcepts, - identical.match = T, testIfEndLeave = F)[["conceptPath"]] - if(constraintValuePath != paste(constraint$conceptPath, constraint$value, "\\", sep = "")){ - stop(paste("Incorrect (sub)constraint definition for (sub)constraint:\'", .expressionToText(patientsetConstraints), - "\'.", "\nThe constraint value \'", constraint$value,"\' does not seem to be an existing value ", - "of the categorical concept \'", constraint$concept, "\'.", - "\nConcept path: ", constraint$conceptPath,"\nPath to contstraint value: ", - constraintValuePath, sep= "")) + if(constraint$conceptType == "CATEGORICAL_NODE" ){ + + #check if supplied constraint value is character + if(!is.character(constraint$value)){ + warning(paste("The supplied constraint value ", constraint$value," is not of class \'character\', while concept ", + constraint$conceptPath, " is a categorical concept (ie. containing text).", + "\n(This was the concept selected based on the input: \'", + constraint$concept, "\')", "\nWill convert the value to character, but unless there is actually a ", + "categorical value that matches the constraint value, this will result in an error later on.", + "\nEncountered in (sub)constraint: ",.expressionToText(patientsetConstraints), + sep = "" )) + constraint$value <- as.character(constraint$value) + } + + #check if the given constraint value exists for the specified categorical concept + constraintValuePath <- .getConstraintConcept(constraint$value, patientsetConstraints, studyConcepts, + identical.match = T, testIfEndLeave = F)[["conceptPath"]] + if(constraintValuePath != paste(constraint$conceptPath, constraint$value, "\\", sep = "")){ + stop(paste("Incorrect (sub)constraint definition for (sub)constraint:\'", .expressionToText(patientsetConstraints), + "\'.", "\nThe constraint value \'", constraint$value,"\' does not seem to be an existing value ", + "of the categorical concept \'", constraint$concept, "\'.", + "\nConcept path: ", constraint$conceptPath,"\nPath to contstraint value: ", + constraintValuePath, sep= "")) + } + + #translate relational operator from R to a value operator that can be used in the query + #only EQ and NE are possible for text variables. Only EQ is supported right now + constraint$value_operator <- .getValueOperator(constraint$operator, "CATEGORICAL_NODE") + + # construct the "item" subtree for the current constraint + if(constraint$value_operator == "EQ"){ + itemXMLtree <- xmlNode("item", xmlNode("item_key", .makeItemKey(constraintValuePath))) + } + if(constraint$value_operator == "NE"){ + stop("For now the '!=' operation is not supported for categorical values") + ##implement later? So that if you specify conceptX != A then it automatically selects all possible categorical + # values in conceptX, except A. (you can't just use invert=1, for example trial_group != control | x < 1) + # or trial_group != control | lung_abnormal == "YES" should work too) + } } - #translate relational operator from R to a value operator that can be used in the query - #only EQ and NE are possible for text variables. Only EQ is supported right now - constraint$value_operator <- .getValueOperator(constraint$operator, "CATEGORICAL_NODE") - - # construct the "item" subtree for the current constraint - if(constraint$value_operator == "EQ"){ - itemXMLtree <- xmlNode("item", xmlNode("item_key", .makeItemKey(constraintValuePath))) + if(constraint$conceptType == "HIGH_DIMENSIONAL"){ + # you cannot apply relational operations to the high dimensional node + stop(paste("Incorrect use of a high dimensional data node in (sub)constraint: ", + .expressionToText(patientsetConstraints),".", + "\nHigh dimensional nodes can only be used for defining patient sets by supplying the node name ", + "alone (e.g. \"mRNA day1\"); it is not possible to apply a relational operation (such as \"mRNA day1 < 0\")", + " to the node. \nWhen a high dimensional node name is supplied, ", + "all patients that have data for that high dimensional node will be selected.", sep = "")) } - if(constraint$value_operator == "NE"){ - stop("For now the '!=' operation is not supported for categorical values") - ##implement later? So that if you specify conceptX != A then it automatically selects all possible categorical - # values in conceptX, except A. (you can't just use invert=1, for example trial_group != control | x < 1) - # or trial_group != control | lung_abnormal == "YES" should work too) + + if(is.na(constraint$value_operator)){ + stop(paste("Could not determine which value_operator to use in the query definition for the constraint \'", + .expressionToText(patientsetConstraints), "\'. Operator supplied by user: ", constraint$operator, + sep = "" )) } - } - - if(constraint$conceptType == "HIGH_DIMENSIONAL"){ - # you cannot apply relational operations to the high dimensional node - stop(paste("Incorrect use of a high dimensional data node in (sub)constraint: ", - .expressionToText(patientsetConstraints),".", - "\nHigh dimensional nodes can only be used for defining patient sets by supplying the node name ", - "alone (e.g. \"mRNA day1\"); it is not possible to apply a relational operation (such as \"mRNA day1 < 0\")", - " to the node. \nWhen a high dimensional node name is supplied, ", - "all patients that have data for that high dimensional node will be selected.", sep = "")) - } - - if(is.na(constraint$value_operator)){ - stop(paste("Could not determine which value_operator to use in the query definition for the constraint \'", - .expressionToText(patientsetConstraints), "\'. Operator supplied by user: ", constraint$operator, - sep = "" )) - } - - return(itemXMLtree) + + return(itemXMLtree) } @@ -559,198 +559,198 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = # \\Public Studies\Public Studies\Cell-line\Demographics\Age\ # \\Private Studies\Private Studies\Cell-line\Characteristics\Age\ .makeItemKey <- function(conceptPath){ - dimension <- strsplit(conceptPath, "\\\\")[[1]][2] #get first part of the concept path, that is either public or private study - - if(!dimension %in% c("Public Studies", "Private Studies")){ - stop("Could not determine the dimension for the item_key, that is used for the XML query")} - item_key <- paste("\\\\", dimension, conceptPath, sep = "") - return(item_key) + dimension <- strsplit(conceptPath, "\\\\")[[1]][2] #get first part of the concept path, that is either public or private study + + if(!dimension %in% c("Public Studies", "Private Studies")){ + stop("Could not determine the dimension for the item_key, that is used for the XML query")} + item_key <- paste("\\\\", dimension, conceptPath, sep = "") + return(item_key) } #translate relational operators to a text representation as is expected for the query .getValueOperator <- function(operator, type){ - if(type == "NUMERIC"){ - if(operator == "<"){return("LT")} - if(operator == "<="){return("LE")} - if(operator == ">"){return("GT")} - if(operator == ">="){return("GE")} - if(operator == "=="){return("EQ")} - if(operator == "!="){return("NE")} - } - - if(type == "CATEGORICAL_NODE"){ - if(operator %in% c("<", "<=", ">", ">=")){ - stop(paste("The operation \'", operator, "\' is not supported for text variables.", sep = ""))} - if(operator == "=="){return("EQ")} - if(operator == "!="){return("NE")} - } - - #if the function did not return yet, something went wrong. - stop(paste("Something went wrong with determining the value_operator to use for the query definition. Operator:", - operator,". Value type: ", type, sep = "")) + if(type == "NUMERIC"){ + if(operator == "<"){return("LT")} + if(operator == "<="){return("LE")} + if(operator == ">"){return("GT")} + if(operator == ">="){return("GE")} + if(operator == "=="){return("EQ")} + if(operator == "!="){return("NE")} + } + + if(type == "CATEGORICAL_NODE"){ + if(operator %in% c("<", "<=", ">", ">=")){ + stop(paste("The operation \'", operator, "\' is not supported for text variables.", sep = ""))} + if(operator == "=="){return("EQ")} + if(operator == "!="){return("NE")} + } + + #if the function did not return yet, something went wrong. + stop(paste("Something went wrong with determining the value_operator to use for the query definition. Operator:", + operator,". Value type: ", type, sep = "")) } # find the concept path for a given concept definition. Concept can be specified as pattern matching a # concept name, or as a partial/full concept path or link .getConstraintConcept <- function(concept, subconstraint, studyConcepts, identical.match = F, testIfEndLeave = T){ - info <- "Correct way to supply a concept (as part of a (sub)constraint) is: - either directly as a string, containing the concept name or path, - or indirectly as an object (variable) that contains a string with the concept name or path. - Supplying a concept link as found in the column \'api.link.observations.href\' of the data.frame retrieved by - getConcepts() should also work. - Example: if you want to select patients younger than 12, supply \"age\" directly as as string: \"age\" < 12 - or indirectly: concepts[2] < 12, where concepts[2] contains the string \"age\"." - - subconstraint <- .expressionToText(subconstraint) - - #if not string: get the value of the variable/object. Value should be one string. - if(class(concept) == "name" | class(concept) == "call"){ - result <- try(eval(concept, envir = globalenv()), silent = T) - if(class(result) == "try-error"){ - try_error <- attr(result, "condition")$message - err_message <- paste(try_error, ". Object was specified in subconstraint ", subconstraint, ".\n", info, sep = "") - stop(err_message) + info <- "Correct way to supply a concept (as part of a (sub)constraint) is: + either directly as a string, containing the concept name or path, + or indirectly as an object (variable) that contains a string with the concept name or path. + Supplying a concept link as found in the column \'api.link.observations.href\' of the data.frame retrieved by + getConcepts() should also work. + Example: if you want to select patients younger than 12, supply \"age\" directly as as string: \"age\" < 12 + or indirectly: concepts[2] < 12, where concepts[2] contains the string \"age\"." + + subconstraint <- .expressionToText(subconstraint) + + #if not string: get the value of the variable/object. Value should be one string. + if(class(concept) == "name" | class(concept) == "call"){ + result <- try(eval(concept, envir = globalenv()), silent = T) + if(class(result) == "try-error"){ + try_error <- attr(result, "condition")$message + err_message <- paste(try_error, ". Object was specified in subconstraint ", subconstraint, ".\n", info, sep = "") + stop(err_message) + } + if(length(result) >1){ + write(paste("The content of object: \'", .expressionToText(concept), "\' is:", sep = "" ),"") + print(result) + stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, + ".\nObject length of \'", .expressionToText(concept), + "\' is larger than 1. Only a single string is allowed for specifying the concept.", + "The content of this variable is printed above this error message.", sep = "")) + } + if(is.na(result)){ + stop(paste("Content of \'",.expressionToText(concept), "\' is 'NA'.", + " Cannot use 'NA' as concept specification.", sep = "")) + } + concept <- result } - if(length(result) >1){ - write(paste("The content of object: \'", .expressionToText(concept), "\' is:", sep = "" ),"") - print(result) - stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, - ".\nObject length of \'", .expressionToText(concept), - "\' is larger than 1. Only a single string is allowed for specifying the concept.", - "The content of this variable is printed above this error message.", sep = "")) + #concept should be a string. + if(!is.character(concept)){ + stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, ".\n", info, sep = "")) } - if(is.na(result)){ - stop(paste("Content of \'",.expressionToText(concept), "\' is 'NA'.", - " Cannot use 'NA' as concept specification.", sep = "")) - } - concept <- result - } - #concept should be a string. - if(!is.character(concept)){ - stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, ".\n", info, sep = "")) - } - - orig_concept <- concept - - if(identical.match) { - concept <- paste("^", concept, "$", sep = "") - concept <- gsub("^^", "^", concept, fixed = T) - concept <- gsub("$$", "$", concept, fixed = T) - } - - is.concept.path <- grepl("\\\\", concept) - conceptMatch <- character(0) - if(!is.concept.path){ - #concept paths are in 'fullName' column of getConcepts result - conceptMatch <- grep(concept, studyConcepts$name, ignore.case = !identical.match) - - if(length(conceptMatch) > 1){ - conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, concept_list = studyConcepts$name) + + orig_concept <- concept + + if(identical.match) { + concept <- paste("^", concept, "$", sep = "") + concept <- gsub("^^", "^", concept, fixed = T) + concept <- gsub("$$", "$", concept, fixed = T) } - } - - if(length(conceptMatch) == 0){ - # supplied concept migth be concept path or link. - is.concept.link <- grepl("^/studies/.+/concepts/", concept) | grepl("^studies/.+/concepts/", concept) - - if(is.concept.path & is.concept.link){stop( - paste( "Something went wrong with detecting whether the provided string \'", concept, - "\' is a concept path or concept link. Please check if the provided string is correct.", - "\nTo check this, you can look at the resulting data.frame of getConcepts(YOUR_STUDY_NAME).", - "\nThe concept paths that can be used for this study can be found in the \'fullName\' column,", - "and the concept links in the \'api.link.self.href\' column", - "If the string does have the correct format, you may have encountered a bug.", - "\nYou can help fix it by contacting us. Type ?transmartRClient for contact details.", sep = "")) - } - - if(is.concept.path){ - conceptMatch <- grep(concept, studyConcepts$fullName, fixed = T) - if(length(conceptMatch) > 1){ - conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, - concept_list = studyConcepts$fullName) - } + + is.concept.path <- grepl("\\\\", concept) + conceptMatch <- character(0) + if(!is.concept.path){ + #concept paths are in 'fullName' column of getConcepts result + conceptMatch <- grep(concept, studyConcepts$name, ignore.case = !identical.match) + + if(length(conceptMatch) > 1){ + conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, concept_list = studyConcepts$name) + } } - if(is.concept.link){ - message("\nDetecting a concept.link. Will attempt to find matching concept path.") - conceptMatch <- grep(concept, studyConcepts$api.link.self.href) - if(length(conceptMatch) > 1){ - conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, - concept_list = studyConcepts$api.link.self.href) - } + if(length(conceptMatch) == 0){ + # supplied concept migth be concept path or link. + is.concept.link <- grepl("^/studies/.+/concepts/", concept) | grepl("^studies/.+/concepts/", concept) + + if(is.concept.path & is.concept.link){stop( + paste( "Something went wrong with detecting whether the provided string \'", concept, + "\' is a concept path or concept link. Please check if the provided string is correct.", + "\nTo check this, you can look at the resulting data.frame of getConcepts(YOUR_STUDY_NAME).", + "\nThe concept paths that can be used for this study can be found in the \'fullName\' column,", + "and the concept links in the \'api.link.self.href\' column", + "If the string does have the correct format, you may have encountered a bug.", + "\nYou can help fix it by contacting us. Type ?transmartRClient for contact details.", sep = "")) + } + + if(is.concept.path){ + conceptMatch <- grep(concept, studyConcepts$fullName, fixed = T) + if(length(conceptMatch) > 1){ + conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, + concept_list = studyConcepts$fullName) + } + } + + if(is.concept.link){ + message("\nDetecting a concept.link. Will attempt to find matching concept path.") + conceptMatch <- grep(concept, studyConcepts$api.link.self.href) + if(length(conceptMatch) > 1){ + conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, + concept_list = studyConcepts$api.link.self.href) + } + } } - } - - identicalM <- "" - if(identical.match){identicalM <- "identical(literal) "} - if(length(conceptMatch) == 0){ - stop(paste("No ", identicalM, "matching concept or categorical value found in this study for \'", - orig_concept, "\', found in subconstraint: ", subconstraint, - "\nNote: The supplied concept in the constraint definition can be a full or partial ", - "match to the concept name, and can even contain regular expressions (pattern matching will be done as", - " done in the grep function, ignoring case) or it can be a concept.link or a concept.path.", - "\nIn case of a categorical concept; the value part of the constraint has to be a literal match to one", - " of the possible categorical values for that concept." , sep = "")) - } - - #test if matches are endLeaves, ie. a data node. - # If constraints are supplied in the form of {concept}{operator}{constraint_value}, the concept should be an end leave - # (ie. data node), either categorical or numerical, and if it's categorical it should be an end leave and not a - # categorical value. If only a concept is supplied as a constraint, it is possible to also use other concepts that - # are not end leaves, and high dimensional data nodes - in that case testIfEndLeave should be FALSE. - is.endLeaf <- studyConcepts$endLeaf[conceptMatch] == "YES" - - if(!is.endLeaf & testIfEndLeave){ - stop(paste("The supplied concept \'", concept, "\' is not a data node (ie. not an end leaf of the transmart tree).", - "The supplied concept name/path/link must point to a single numerical or categorical", - " data node (end leaf).", sep = "")) - } - - matched_concept = list(conceptPath = studyConcepts$fullName[conceptMatch], - conceptType = studyConcepts$type[conceptMatch]) - message(paste("\nMatched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, - "\'\n to concept (full path): \'", matched_concept$conceptPath, "\'\n", sep = "") ) - return(matched_concept) + + identicalM <- "" + if(identical.match){identicalM <- "identical(literal) "} + if(length(conceptMatch) == 0){ + stop(paste("No ", identicalM, "matching concept or categorical value found in this study for \'", + orig_concept, "\', found in subconstraint: ", subconstraint, + "\nNote: The supplied concept in the constraint definition can be a full or partial ", + "match to the concept name, and can even contain regular expressions (pattern matching will be done as", + " done in the grep function, ignoring case) or it can be a concept.link or a concept.path.", + "\nIn case of a categorical concept; the value part of the constraint has to be a literal match to one", + " of the possible categorical values for that concept." , sep = "")) + } + + #test if matches are endLeaves, ie. a data node. + # If constraints are supplied in the form of {concept}{operator}{constraint_value}, the concept should be an end leave + # (ie. data node), either categorical or numerical, and if it's categorical it should be an end leave and not a + # categorical value. If only a concept is supplied as a constraint, it is possible to also use other concepts that + # are not end leaves, and high dimensional data nodes - in that case testIfEndLeave should be FALSE. + is.endLeaf <- studyConcepts$endLeaf[conceptMatch] == "YES" + + if(!is.endLeaf & testIfEndLeave){ + stop(paste("The supplied concept \'", concept, "\' is not a data node (ie. not an end leaf of the transmart tree).", + "The supplied concept name/path/link must point to a single numerical or categorical", + " data node (end leaf).", sep = "")) + } + + matched_concept = list(conceptPath = studyConcepts$fullName[conceptMatch], + conceptType = studyConcepts$type[conceptMatch]) + message(paste("\nMatched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, + "\'\n to concept (full path): \'", matched_concept$conceptPath, "\'\n", sep = "") ) + return(matched_concept) } #called by .getConstraintConcept if there were initially multiple matches found for the concept, using the 'grep' function .selectMatch <- function(concept, matching_indices, concept_list){ - #any literal, full length matches? (ignoring case) - literalMatches <- tolower(concept_list[matching_indices]) == tolower(concept) - if(any(literalMatches)){ - matching_indices <- matching_indices[literalMatches] - if(length(matching_indices) > 1){ - stop(paste("There seem to be more than one concepts with the name \'", concept, "\'.", - "\nPlease use the concept path instead of the concept name to specify the concept.", - "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).", sep = "")) + #any literal, full length matches? (ignoring case) + literalMatches <- tolower(concept_list[matching_indices]) == tolower(concept) + if(any(literalMatches)){ + matching_indices <- matching_indices[literalMatches] + if(length(matching_indices) > 1){ + stop(paste("There seem to be more than one concepts with the name \'", concept, "\'.", + "\nPlease use the concept path instead of the concept name to specify the concept.", + "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).", sep = "")) + } + message(paste("\nMultiple matching concepts found for the string \'", concept, + "\'. One identical match was found (ignoring case): \'", + concept_list[matching_indices], "\'.\nThis match is selected.", + "\nFor more precise matching use full-length concept names, paths, or links,", + " and/or include beginning/end of string symbols (^/$) - see ?regexp.", + "Note: regexp can only be used for specifying concept names or links, not paths",sep = "")) + } + + #if not literal match take the shortest match + if(!any(literalMatches)){ + paths_tmp<- concept_list[matching_indices] + shortest_match<- matching_indices[which.min(nchar(paths_tmp))] + matching_indices<- shortest_match + message(paste("\nMultiple matching concepts found for the string \'", concept,"\', selecting shortest match: \'", + paste(concept_list[shortest_match], collapse = ","), "\'.", + "\nFor more precise matching use full-length names or paths,", + " and/or include beginning/end of string symbols (^/$) - see ?regexp", sep = "")) + if(length(matching_indices) > 1){ + stop(paste("There are multiple shortest matches for \'", concept, "\'. Matches: ", + paste(concept_list[shortest_match], collapse = ", "), ".", + "\nPlease use a more specific/longer string for specifying the concept name or path,", + "or use the (full) concept path instead of the concept name to specify the concept.", + "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).", sep = "")) + } } - message(paste("\nMultiple matching concepts found for the string \'", concept, - "\'. One identical match was found (ignoring case): \'", - concept_list[matching_indices], "\'.\nThis match is selected.", - "\nFor more precise matching use full-length concept names, paths, or links,", - " and/or include beginning/end of string symbols (^/$) - see ?regexp.", - "Note: regexp can only be used for specifying concept names or links, not paths",sep = "")) - } - - #if not literal match take the shortest match - if(!any(literalMatches)){ - paths_tmp<- concept_list[matching_indices] - shortest_match<- matching_indices[which.min(nchar(paths_tmp))] - matching_indices<- shortest_match - message(paste("\nMultiple matching concepts found for the string \'", concept,"\', selecting shortest match: \'", - paste(concept_list[shortest_match], collapse = ","), "\'.", - "\nFor more precise matching use full-length names or paths,", - " and/or include beginning/end of string symbols (^/$) - see ?regexp", sep = "")) - if(length(matching_indices) > 1){ - stop(paste("There are multiple shortest matches for \'", concept, "\'. Matches: ", - paste(concept_list[shortest_match], collapse = ", "), ".", - "\nPlease use a more specific/longer string for specifying the concept name or path,", - "or use the (full) concept path instead of the concept name to specify the concept.", - "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).", sep = "")) - } - } - return(matching_indices) + return(matching_indices) } From d56ade226bf4cff7a4d2c0ebcd456e9b68d2a69d Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Mon, 15 Aug 2016 14:28:33 +0200 Subject: [PATCH 12/26] Bugfix: Post body encoding encode should be unset (NULL) if we don't want httr to encode the body for us. --- R/RClientConnectionManager.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/RClientConnectionManager.R b/R/RClientConnectionManager.R index 5753df0..c9f7733 100644 --- a/R/RClientConnectionManager.R +++ b/R/RClientConnectionManager.R @@ -289,7 +289,7 @@ function(apiCall, httpHeaderFields, accept.type = "default", post.body = NULL, p body = post.body, add_headers(httpHeaderFields), authenticate(transmartClientEnv$client_id, transmartClientEnv$client_secret), - encode = if(post.content.type == 'form') 'form' else 'raw', + encode = if(post.content.type == 'form') 'form' else NULL, if(post.content.type != 'form') content_type(post.content.type), config(verbose = getOption("verbose"))) if (getOption("verbose")) { message("POST body:\n", .list2string(post.body), "\n") } From bbc15dd27f8f71bd0207ed209d95f727edcbd130 Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Mon, 22 Aug 2016 14:42:13 +0200 Subject: [PATCH 13/26] Changed function name to createPatientSet --- R/{getPatientSetID.R => createPatientSet.R} | 44 ++++++++-------- ...PatientSetID.R => runitCreatePatientSet.R} | 51 ++++++++++--------- ...getPatientSetID.Rd => createPatientSet.Rd} | 30 +++++------ man/getHighdimData.Rd | 2 +- man/getObservations.Rd | 2 +- man/getPatientSet.Rd | 2 +- man/transmartRClient-package.Rd | 2 +- 7 files changed, 68 insertions(+), 65 deletions(-) rename R/{getPatientSetID.R => createPatientSet.R} (95%) rename inst/unittests/{runitGetPatientSetID.R => runitCreatePatientSet.R} (90%) rename man/{getPatientSetID.Rd => createPatientSet.Rd} (89%) diff --git a/R/getPatientSetID.R b/R/createPatientSet.R similarity index 95% rename from R/getPatientSetID.R rename to R/createPatientSet.R index 58098c1..960cb28 100644 --- a/R/getPatientSetID.R +++ b/R/createPatientSet.R @@ -26,17 +26,18 @@ # Patient.set constraints are provided as an expression in the shape of, for example, # (c1 | c2) & (c3|c4|c5) & c6 &... where c is either a constraint built up as {concept}{operator}{constraint_value} # (e.g. "age" < 60) or a reference to a concept (e.g. "age") -getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = F){ +createPatientSet <- function(study.name, patientset.constraints, returnXMLquery = F){ if(missing(study.name)){stop("Provide study name")} if(missing(patientset.constraints)){stop("Provide patientset.constraints")} message("\nProcessing input...", "") # retrieve the expression that defines the constraints - patientset.constraints <- substitute(patientset.constraints) #needs to be like this, with possible later evaluation in - # parsePatientsetConstraints because otherwise things such as "age"<65 & "biomarker data" - # will result in an error (problem is the string without operator) if you - # try e.g. is.call or is.character on the input - # if constraints are supplied as string, try to parse the string + patientset.constraints <- substitute(patientset.constraints) #needs to be like this, with possible later evaluation + # in parsePatientsetConstraints because otherwise things such as + #"age"<65 & "biomarker data" will result in an error (problem is the + # string without operator) if you try e.g. is.call or is.character + # on the input if constraints are supplied as string, try to parse + # the string patientset.constraints <- .checkPatientSetConstraints(patientset.constraints) @@ -65,8 +66,8 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = input_patientset.constraints = .expressionToText(patientset.constraints), finalQueryConstraints = hrConstraints) - message(paste("\nBased on the input, the following constraints were defined and sent to the server (always includes study concept):\n", - result$finalQueryConstraints, sep = ""), "") + message(paste("\nBased on the input, the following constraints were defined and sent to the server", + " (always includes study concept):\n", result$finalQueryConstraints, sep = ""), "") if(returnXMLquery){result[["xmlQuery"]] <- xmlQuery} return(result) } @@ -87,7 +88,8 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = } if(length(patientsetConstraintsParsed) > 1){ message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", - "such as: \"age\" > 65. \nWill attempt to parse the constraints out of the string, converting it", + "such as: \"age\" > 65.", + "\nWill attempt to parse the constraints out of the string, converting it", "into an expression...")) patientsetConstraints <- patientsetConstraintsParsed } @@ -187,9 +189,9 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = # if length(patientsetConstraints) == 3, then the expression contains three elements, so it is either a low-level # constraint of the form {concept}{constraint_operator}{constraint_value} or it is a concatenation of constraints # separated by either an AND or OR operator (of form {some constraint(s)}{ &, &&, | or || }{some constraint(s)} ) - # alternatively it is an expression containing the call to substitute() or an object with index, e.g. variable[1], data.frame$firstColumn[firstRow],etc - # contain a string specifying a concept, or a string that in itself is a constraint definition, or an expression as - # created with subsitute for specifying a constraint . + # alternatively it is an expression containing the call to substitute() or an object with index, e.g. variable[1], + # data.frame$firstColumn[firstRow],etc + if(is.symbol(patientsetConstraints)){ firstElement_in_allowedOperators <- F @@ -215,14 +217,14 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = return(itemXMLlist) }else{ # it's a concatenation of constraints: call function again on the subconstraints. - # right now it only supports the format where the & operators are always the highest level operators and the | - # operators are only used as lowest level, forcing the format: (c1|c2)&c3&(c4|c5|c6|...)& ... + # right now it only supports the format where the & operators are always the highest level operators and the + # | operators are only used as lowest level, forcing the format: (c1|c2)&c3&(c4|c5|c6|...)& ... treeBeforeOperator <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) treeAfterOperator <- .parsePatientSetConstraints(patientsetConstraints[[3]], studyConcepts) - #if there is an "OR" operation inbetween two subconstraints, the combination of those two subconstraints cannot - # have an & anymore (this is for forcing the strict format for constraint definition described above) + # if there is an "OR" operation inbetween two subconstraints, the combination of those two subconstraints + # cannot have an & anymore (this is for forcing the strict format for constraint definition described above) if(constraintOperator == "|" ) { if(grepl("&", .expressionToText(patientsetConstraints))){ stop(paste("Wrong format of (sub)constraint definition. Found in (sub)constraint: ", @@ -279,12 +281,12 @@ getPatientSetID <- function(study.name, patientset.constraints, returnXMLquery = }else if(class(patientsetConstraints) == "name" | class(patientsetConstraints) == "call"){ # alternatively it is an expression containing a call to substite or an object (with or without index), # e.g. variables[1] or variable, data.frame$firstColumn[firstRow],etc - this object can - # contain a string specifying a concept, or a string that in itself is a constraint definition, or an expression as - # created with subsitute for specifying a constraint . - # e.g. when following input is given: concepts<- c("age", "sex"); getPatientSetID("Some study", concepts[1]) or - # tmp <- c(substitute("age" <65), substitute("sex"== "female")); getPatientSetID("Some study", tmp[[1]]) + # contain a string specifying a concept, or a string that in itself is a constraint definition, or an + # expression as created with subsitute for specifying a constraint . + # e.g. when following input is given: concepts<- c("age", "sex"); createPatientSet("Some study", concepts[1]) or + # tmp <- c(substitute("age" <65), substitute("sex"== "female")); createPatientSet("Some study", tmp[[1]]) # or an object with strings specifying the constraints, and then the strings shoudl be turned into expressions too. - # tmp <- c("\"age\" <65", "\"sex\"== \"female\""); getPatientSetID("Some study", tmp[1]) + # tmp <- c("\"age\" <65", "\"sex\"== \"female\""); createPatientSet("Some study", tmp[1]) # try to evaluate the expression and find a matching constraint concept result <- try(eval(patientsetConstraints, envir = globalenv()), silent = T) if(class(result) == "try-error"){ diff --git a/inst/unittests/runitGetPatientSetID.R b/inst/unittests/runitCreatePatientSet.R similarity index 90% rename from inst/unittests/runitGetPatientSetID.R rename to inst/unittests/runitCreatePatientSet.R index 743c1be..e13f10b 100644 --- a/inst/unittests/runitGetPatientSetID.R +++ b/inst/unittests/runitCreatePatientSet.R @@ -31,7 +31,8 @@ gseconcepts <- read.table(gse8581conceptsLocation, header = T, stringsAsFactors ### unit tests for function .checkPatientSetConstraints ### # this should convert a string to an expression, if the constraints are # provided as a string. Also checks that it only contains one string -#both "\"age\"" and "age" as input should result in returning "age" # suggest to use variable name without the quotes if variable with name age should be used +# both "\"age\"" and "age" as input should result in returning "age" # suggest to use variable name without the quotes +# if variable with name age should be used test.checkPatientSetConstraints.simpleString.1 <- function() { result<- transmartRClient:::.checkPatientSetConstraints("\"age\"") checkEquals("age", result) @@ -197,7 +198,7 @@ test.buildXMLquery.simpleConstraintContainingObjects<- function() { checkIdentical(expected, xmlQueryText) } -#constraints with double substitute (in getPatientSetID another substitute is performed on the input, so if input is +#constraints with double substitute (in createPatientSet another substitute is performed on the input, so if input is # substitute("age"<65), then input for buildXMLquery is substitute(substitute("age"<65)) test.buildXMLquery.doubleSubstitute<- function() { concepts <- c("age", "sex") @@ -258,34 +259,34 @@ test.buildXMLquery.objectWithSubstitute<- function() { #may also be useful for testing (need to connect to a database that has both the clinical and high dim data of gse8581): -# getPatientSetID("GSE8581", "Age") #works on transmart-dev. -# getPatientSetID("GSE8581", "Subjects") #works on transmart-dev. -# getPatientSetID("GSE8581", "control") #works on transmart-dev. -# getPatientSetID("GSE8581", "Nonsense") #isn't supposed to work -# getPatientSetID("GSE8581", "/studies/gse8581/concepts/Subjects/Age%20%28year%29") #works on transmart-dev -# getPatientSetID("GSE8581", "/studies/gse8581/concepts/Subjects/Age%20%28year%29") #works on transmart-dev -# getPatientSetID("GSE8581", "age" < 65) #works on transmart-dev. -# getPatientSetID("GSE8581", "sex" == "unknown") #isn't supposed to work -# getPatientSetID("GSE8581", "lung" < 65) #isn't supposed to work -# getPatientSetID("GSE8581", "Subjects" < 65) #isn't supposed to work -# getPatientSetID("GSE8581", "age" < 65 | "sex" == "female") #works on transmart-dev. -# getPatientSetID("GSE8581", "age" < 65 & "sex" == "female") #works on transmart-dev. -# getPatientSetID("GSE8581", "age" < 65 & ("lung disease" == "control" | "lung disease" == "chronic obstructive pulmonary disease") & "Biomarker_Data") -# getPatientSetID("GSE8581","sex"== "female" | ("age" < 65 & "Biomarker_Data")) -# getPatientSetID("GSE8581","sex"== "female" & "age" < 65 | "Biomarker_Data") +# createPatientSet("GSE8581", "Age") #works on transmart-dev. +# createPatientSet("GSE8581", "Subjects") #works on transmart-dev. +# createPatientSet("GSE8581", "control") #works on transmart-dev. +# createPatientSet("GSE8581", "Nonsense") #isn't supposed to work +# createPatientSet("GSE8581", "/studies/gse8581/concepts/Subjects/Age%20%28year%29") #works on transmart-dev +# createPatientSet("GSE8581", "/studies/gse8581/concepts/Subjects/Age%20%28year%29") #works on transmart-dev +# createPatientSet("GSE8581", "age" < 65) #works on transmart-dev. +# createPatientSet("GSE8581", "sex" == "unknown") #isn't supposed to work +# createPatientSet("GSE8581", "lung" < 65) #isn't supposed to work +# createPatientSet("GSE8581", "Subjects" < 65) #isn't supposed to work +# createPatientSet("GSE8581", "age" < 65 | "sex" == "female") #works on transmart-dev. +# createPatientSet("GSE8581", "age" < 65 & "sex" == "female") #works on transmart-dev. +# createPatientSet("GSE8581", "age" < 65 & ("lung disease" == "control" | "lung disease" == "chronic obstructive pulmonary disease") & "Biomarker_Data") +# createPatientSet("GSE8581","sex"== "female" | ("age" < 65 & "Biomarker_Data")) +# createPatientSet("GSE8581","sex"== "female" & "age" < 65 | "Biomarker_Data") # concepts <- c("age", "sex") -# getPatientSetID("GSE8581", concepts[1] < some_value) #works on transmart-dev. -# getPatientSetID("GSE8581", substitute(concepts[1] < some_value[1])) #works on transmart-dev. +# createPatientSet("GSE8581", concepts[1] < some_value) #works on transmart-dev. +# createPatientSet("GSE8581", substitute(concepts[1] < some_value[1])) #works on transmart-dev. # constraint <- "concepts[1] < 65" -# getPatientSetID("GSE8581",constraint[1]) #works on transmart-dev. +# createPatientSet("GSE8581",constraint[1]) #works on transmart-dev. # constraint <- "age" -# getPatientSetID("GSE8581",constraint) #works on transmart-dev. +# createPatientSet("GSE8581",constraint) #works on transmart-dev. # constraints <- "age" -# getPatientSetID("GSE8581",constraints[2]) #shouldn't work -# getPatientSetID("GSE8581",constraints[2] < 65) #shouldn't work +# createPatientSet("GSE8581",constraints[2]) #shouldn't work +# createPatientSet("GSE8581",constraints[2] < 65) #shouldn't work # tmp <- c(substitute("age" <65), substitute("sex"== "female")) -# getPatientSetID("GSE8581",tmp[1]) #works on transmart-dev +# createPatientSet("GSE8581",tmp[1]) #works on transmart-dev # concepts <- c("age", "sex") # constraint <- "concepts[1] < 65" -# getPatientSetID("gse8581", "concepts[1] < 65") +# createPatientSet("gse8581", "concepts[1] < 65") diff --git a/man/getPatientSetID.Rd b/man/createPatientSet.Rd similarity index 89% rename from man/getPatientSetID.Rd rename to man/createPatientSet.Rd index 647b7de..d67a7bb 100644 --- a/man/getPatientSetID.Rd +++ b/man/createPatientSet.Rd @@ -1,5 +1,5 @@ -\name{getPatientSetID} -\alias{getPatientSetID} +\name{createPatientSet} +\alias{createPatientSet} \title{ Define a patient set based on a series of constraints and retrieve the patient.set ID @@ -14,7 +14,7 @@ called "finalQueryConstraints"). Optionally, the body of the POST request can be the query definition in XML format as it is sent to tranSMART. } \usage{ -getPatientSetID(study.name, patientset.constraints, returnXMLquery = FALSE) +createPatientSet(study.name, patientset.constraints, returnXMLquery = FALSE) } \arguments{ @@ -55,10 +55,10 @@ Example: "sex"== "female" & ("age" > 65 | "blood_pressure" > 140) & ("diagnosis" == "diabetes" | "diagnosis" == "prediabetic") See also \code{\link{substitute}} for creating expressions in case you want to store the expressions in a variable - before calling getPatientSetID. E.g. \code{getPatientSetID("SOME STUDY", "age" < 65)}, will have the same result as: - \code{my_expression <- substitute("age" < 65) ; getPatientSetID("SOME STUDY", my_expression)}. Alternatively, the - constraints can also be given as a string (\code{getPatientSetID("SOME STUDY", "\"age\" < 65")} or - \code{age_concept <- "age"; getPatientSetID("SOME STUDY", "age_concept < 65") }), but this might not be fully + before calling createPatientSet. E.g. \code{createPatientSet("SOME STUDY", "age" < 65)}, will have the same result as: + \code{my_expression <- substitute("age" < 65) ; createPatientSet("SOME STUDY", my_expression)}. Alternatively, the + constraints can also be given as a string (\code{createPatientSet("SOME STUDY", "\"age\" < 65")} or + \code{age_concept <- "age"; createPatientSet("SOME STUDY", "age_concept < 65") }), but this might not be fully supported. If the constraints are supplied as a single string, things that should be interpreted as text should be quoted. This applies for example to concept names/paths/links and the categorical values (e.g. "\"sex\" == \"Male\""). Else this will be interpreted as a variable name and the function will try to find a variable with that name in the @@ -127,36 +127,36 @@ haven't connected to the server yet, establish a connection using the \code{\lin ## create patient.set #selecting all patients with a value for concept "age" - getPatientSetID("GSE8581", "age") + createPatientSet("GSE8581", "age") #selecting all patients with "age" < 65 - getPatientSetID("GSE8581", "age" < 65) + createPatientSet("GSE8581", "age" < 65) #or: my_concepts <- c("Age", "Sex", "Lung Disease") constraint_value <- 65 - getPatientSetID("GSE8581", my_concepts[1] < constraint_value) + createPatientSet("GSE8581", my_concepts[1] < constraint_value) #multiple constraints can be combined: - getPatientSetID("GSE8581", "Age" < 65 & "Sex" == "female" & ("Lung Disease" == "chronic obstructive pulmonary disease" | + createPatientSet("GSE8581", "Age" < 65 & "Sex" == "female" & ("Lung Disease" == "chronic obstructive pulmonary disease" | "Lung Disease" == "control")) # there are multiple ways the patient.set constraints can be supplied. The following will have the same result: # 1 as expression - getPatientSetID("GSE8581", "age" < 65) + createPatientSet("GSE8581", "age" < 65) # 2 as an object (variable) containing a single expression my_expression <- substitute("age"< 65) - getPatientSetID("GSE8581", my_expression) + createPatientSet("GSE8581", my_expression) #3 as string. # supplying concept name as string: - getPatientSetID("GSE8581", "\"age\" < 65") + createPatientSet("GSE8581", "\"age\" < 65") # or if concept name is stored in an object (variable): age_concept<- "age" - getPatientSetID("GSE8581", "age_concept < 65") + createPatientSet("GSE8581", "age_concept < 65") } diff --git a/man/getHighdimData.Rd b/man/getHighdimData.Rd index 9c4d7ee..a72fcfc 100644 --- a/man/getHighdimData.Rd +++ b/man/getHighdimData.Rd @@ -91,7 +91,7 @@ If no projection is specified this function returns a list of the projections av \author{Tim Dorscheidt, Jan Kanis, Rianne Jansen. Contact: development@thehyve.nl} \note{To be able to access a transmart database, you need to be connected to the server the database is on. If you haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} -\seealso{\code{\link{hash}, \link{highdimInfo}, \link{getStudies}, \link{getConcepts}}, \link{getPatientSetID}.} +\seealso{\code{\link{hash}, \link{highdimInfo}, \link{getStudies}, \link{getConcepts}}, \link{createPatientSet}.} \examples{ \dontrun{ diff --git a/man/getObservations.Rd b/man/getObservations.Rd index 3c9f9b3..a867310 100644 --- a/man/getObservations.Rd +++ b/man/getObservations.Rd @@ -31,7 +31,7 @@ getObservations(study.name, concept.match = NULL, concept.links = NULL, as.data. \references{} \author{Tim Dorscheidt, Jan Kanis, Rianne Jansen. Contact: development@thehyve.nl} \note{To be able to access a transmart database, you need to be connected to the server the database is on. If you haven't connected to the server yet, establish a connection using the \code{\link{connectToTransmart}} function.} -\seealso{\code{\link{getStudies}, \link{getConcepts}, \link{getPatientSetID}}} +\seealso{\code{\link{getStudies}, \link{getConcepts}, \link{createPatientSet}}} \examples{ \dontrun{ # The following will retrieve a list with observations for the study "foo" diff --git a/man/getPatientSet.Rd b/man/getPatientSet.Rd index a7f29f0..27ddeb3 100644 --- a/man/getPatientSet.Rd +++ b/man/getPatientSet.Rd @@ -13,7 +13,7 @@ getPatientSet(id) \arguments{ \item{id}{an integral number, the id of the patient set} \details{ - The function will return a named list with properties of the patient set. Patient sets are created in the Transmart web interface or with the \code{\link{getPatientSetId}} call (still to be implemented). Currently there is no support in the legacy Transmart web interface to view the id of a patient set, but the new web app will support that. + The function will return a named list with properties of the patient set. Patient sets are created in the Transmart web interface or with the \code{\link{createPatientSet}} call (still to be implemented). Currently there is no support in the legacy Transmart web interface to view the id of a patient set, but the new web app will support that. } } \value{ diff --git a/man/transmartRClient-package.Rd b/man/transmartRClient-package.Rd index 8564ebd..d05550e 100644 --- a/man/transmartRClient-package.Rd +++ b/man/transmartRClient-package.Rd @@ -23,7 +23,7 @@ The following functions are available in the package: \code{\link{getHighdimData}}\cr \code{\link{highdimInfo}}\cr \code{\link{getPatientSet}}\cr - \code{\link{getPatientSetID}}\cr + \code{\link{createPatientSet}}\cr } } From 41f79954f5febadff45d97c1f2ae7bc389d3d955 Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Mon, 22 Aug 2016 17:39:39 +0200 Subject: [PATCH 14/26] Quick implementation to use patient.set ID in getObservations --- R/getObservations.R | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/R/getObservations.R b/R/getObservations.R index 29ac5a3..3a27923 100644 --- a/R/getObservations.R +++ b/R/getObservations.R @@ -22,7 +22,8 @@ # You should have received a copy of the GNU General Public License along # with this program. If not, see . -getObservations <- function(study.name, concept.match = NULL, concept.links = NULL, as.data.frame = TRUE) { +getObservations <- function(study.name, concept.match = NULL, concept.links = NULL, as.data.frame = TRUE, + patient.set = NULL) { .ensureTransmartConnection() if (is.null(concept.links)) { @@ -50,10 +51,31 @@ getObservations <- function(study.name, concept.match = NULL, concept.links = NU listOfObservations <- list() - for (oneLink in concept.links) { + if(is.null(patient.set)){ + for (oneLink in concept.links) { serverResult <- .transmartGetJSON(paste(oneLink, "/observations", sep = "")) listOfObservations <- c(listOfObservations, serverResult$observations) + } + }else{ + if(length(patient.set)>1){stop("Only one patient.set ID allowed as input")} + if(!is.numeric(patient.set)){stop("Patient.set ID should be a numeric value")} + + if(length(concept.links) == 1 & concept.links[1] == paste("/studies/", study.name, sep = "")){ + tmpConceptPath<- studyConcepts$fullName[1] + fullConceptNames <- gsub(paste(study.name,"\\\\.*", sep = ""),paste(study.name, "\\\\", sep = ""), + tmpConceptPath, ignore.case=T) + }else{ + fullConceptNames<-studyConcepts$fullName[match(concept.links, studyConcepts$api.link.self.href)] + } + for (oneName in fullConceptNames) { + serverResult <- .transmartGetJSON( + paste("/observations?patient_sets=", patient.set, + "&concept_paths=", URLencode(oneName), + sep="")) + listOfObservations <- c(listOfObservations, serverResult$observations) + } } + if (as.data.frame) { dataFrameObservations <- .listToDataFrame(listOfObservations) From 95084d0d6d0b6231ffffc28411bb483b82c34013 Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Mon, 22 Aug 2016 17:45:34 +0200 Subject: [PATCH 15/26] updated manual --- R/getObservations.R | 4 ++-- man/getHighdimData.Rd | 3 ++- man/getObservations.Rd | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/getObservations.R b/R/getObservations.R index 3a27923..7bb8164 100644 --- a/R/getObservations.R +++ b/R/getObservations.R @@ -22,8 +22,8 @@ # You should have received a copy of the GNU General Public License along # with this program. If not, see . -getObservations <- function(study.name, concept.match = NULL, concept.links = NULL, as.data.frame = TRUE, - patient.set = NULL) { +getObservations <- function(study.name, concept.match = NULL, concept.links = NULL, patient.set = NULL, + as.data.frame = TRUE) { .ensureTransmartConnection() if (is.null(concept.links)) { diff --git a/man/getHighdimData.Rd b/man/getHighdimData.Rd index a72fcfc..e7db047 100644 --- a/man/getHighdimData.Rd +++ b/man/getHighdimData.Rd @@ -48,7 +48,8 @@ The remaining parameters are constraints that limit the amount of data that is r Assay constraints: \item{trial.name}{A single character string with the trial name.} - \item{patient.set}{A number indicating the patient set.} + \item{patient.set}{A number indicating the patient set, as created with \code{\link{createPatientSet}}. + It can be used to retrieve only the data for the patients that belong to that specific patient.set} \item{ontology.term}{A single character string containing the concept path.} \item{assay.ids}{A numeric vector containing the id's of the assays you want to retrieve.} \item{patient.ids}{A character vector with the patient ids that you want to retrieve.} diff --git a/man/getObservations.Rd b/man/getObservations.Rd index a867310..d95264f 100644 --- a/man/getObservations.Rd +++ b/man/getObservations.Rd @@ -8,12 +8,14 @@ study. A subset of observations can be selected by filtering by concept. } \usage{ -getObservations(study.name, concept.match = NULL, concept.links = NULL, as.data.frame = TRUE) +getObservations(study.name, concept.match = NULL, concept.links = NULL, patient.set = NULL, as.data.frame = TRUE) } \arguments{ \item{study.name}{a character string giving the name of a study.} \item{concept.match}{a character string or character vector containing the concept name(s) that should be matched. For each vector element, the \code{getObservations} function will search within the requested study for the first concept which name contains the given character string. It uses the name column of the result from \code{\link{getConcepts}} to perform the matching.} \item{concept.links}{ a character string or a character vector containing the link(s) pointing to the locations of the chosen concepts on the server. Candidate values for this argument can be found in the \code{api.link.self.href} column of the \code{\link{getConcepts}} results. It is the most exact way to refer to a concept, and it overwrites the \code{concept.match} argument.} + \item{patient.set}{ A number indicating the patient set, as created with \code{\link{createPatientSet}}. + It can be used to retrieve only the data for the patients that belong to that specific patient.set} \item{as.data.frame}{logical (default setting is TRUE): should the list containing the observation values be converted to a dataframe?} } \details{ From dd433ba8e705db56cc3b823ec668ca1f8ab3388a Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Mon, 29 Aug 2016 12:43:18 +0200 Subject: [PATCH 16/26] Fix style Still to be done: consistent indentation (1 tab). --- R/getObservations.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/getObservations.R b/R/getObservations.R index 7bb8164..029524d 100644 --- a/R/getObservations.R +++ b/R/getObservations.R @@ -40,7 +40,7 @@ getObservations <- function(study.name, concept.match = NULL, concept.links = NU } } } else { - concept.links <- paste("/studies/", study.name, sep = "") + concept.links <- paste0("/studies/", study.name) } } @@ -57,15 +57,15 @@ getObservations <- function(study.name, concept.match = NULL, concept.links = NU listOfObservations <- c(listOfObservations, serverResult$observations) } }else{ - if(length(patient.set)>1){stop("Only one patient.set ID allowed as input")} - if(!is.numeric(patient.set)){stop("Patient.set ID should be a numeric value")} + if(length(patient.set) > 1) { stop("Only one patient.set ID allowed as input") } + if(!is.numeric(patient.set)) { stop("Patient.set ID should be a numeric value") } - if(length(concept.links) == 1 & concept.links[1] == paste("/studies/", study.name, sep = "")){ + if(length(concept.links) == 1 && concept.links[1] == paste0("/studies/", study.name)) { tmpConceptPath<- studyConcepts$fullName[1] - fullConceptNames <- gsub(paste(study.name,"\\\\.*", sep = ""),paste(study.name, "\\\\", sep = ""), + fullConceptNames <- gsub(paste0(study.name,"\\\\.*"), paste0(study.name, "\\\\"), tmpConceptPath, ignore.case=T) - }else{ - fullConceptNames<-studyConcepts$fullName[match(concept.links, studyConcepts$api.link.self.href)] + } else { + fullConceptNames <- studyConcepts$fullName[match(concept.links, studyConcepts$api.link.self.href)] } for (oneName in fullConceptNames) { serverResult <- .transmartGetJSON( From dfc8ac7c69ab9a8a72d0168247b479ce7f57999b Mon Sep 17 00:00:00 2001 From: Rianne Jansen Date: Wed, 7 Sep 2016 15:49:05 +0200 Subject: [PATCH 17/26] Give error message when string input is of incorrect format --- R/createPatientSet.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/createPatientSet.R b/R/createPatientSet.R index 960cb28..549489e 100644 --- a/R/createPatientSet.R +++ b/R/createPatientSet.R @@ -80,7 +80,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. The patient set constraints should be supplied in one single expression (or string).")} - try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] + result <- try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] if(length(patientsetConstraintsParsed) == 1){ if(is.character(patientsetConstraintsParsed)){ #e.g. happens if input string is "\"age\"" patientsetConstraints <- patientsetConstraintsParsed @@ -95,6 +95,14 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery } }, silent = T ) + if(class(result) == "try-error"){ + errorText <- paste("Detected a string as input for patient set constraints. Have tried to parse the", + "constraints out of the string to convert it into an expression, but the attempt to", + "parse the constraints out of the string failed:\n\n", + result[1], "\nPlease check the format of your input.", + "Type ?createPatientSet for more details on the expected format.") + stop(errorText) + } } return(patientsetConstraints) } From 0051171fe9189675c96436b17f28dcc3e00bfa36 Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Tue, 13 Sep 2016 17:19:32 +0200 Subject: [PATCH 18/26] some refactorings And some work in progress --- R/createPatientSet.R | 137 ++++++++++++------------- inst/unittests/runitCreatePatientSet.R | 6 +- 2 files changed, 71 insertions(+), 72 deletions(-) diff --git a/R/createPatientSet.R b/R/createPatientSet.R index 960cb28..4724ff6 100644 --- a/R/createPatientSet.R +++ b/R/createPatientSet.R @@ -46,7 +46,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # paths. studyConcepts <- getConcepts(study.name) studyConcepts <- studyConcepts[, c("name", "fullName", "type", "api.link.self.href")] - studyConcepts <- .findEndLeaves(studyConcepts) + studyConcepts <- .findEndLeaves(studyConcepts) # read the constraints given by the user, and convert this to a XML query definition in the format as expected by REST-API xmlQuery <- .buildXMLquery(patientset.constraints, studyConcepts, study.name) @@ -75,27 +75,27 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery .checkPatientSetConstraints <- function(patientsetConstraints){ #test if it is expression and not a string. If string: try to parse - if(is.character(patientsetConstraints)){ - if(length(patientsetConstraints) > 1){ - stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. - The patient set constraints should be supplied in one single expression (or string).")} - - try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] - if(length(patientsetConstraintsParsed) == 1){ - if(is.character(patientsetConstraintsParsed)){ #e.g. happens if input string is "\"age\"" - patientsetConstraints <- patientsetConstraintsParsed - } - } - if(length(patientsetConstraintsParsed) > 1){ - message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", - "such as: \"age\" > 65.", - "\nWill attempt to parse the constraints out of the string, converting it", - "into an expression...")) - patientsetConstraints <- patientsetConstraintsParsed - } - }, silent = T - ) + if(!is.character(patientsetConstraints)) { + return(patientsetConstraints) } + if(length(patientsetConstraints) > 1){ + stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. + The patient set constraints should be supplied in one single expression (or string).")} + + # TODO: is deze try nodig? + try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] + if(length(patientsetConstraintsParsed) == 1 && is.character(patientsetConstraintsParsed)) { + #e.g. happens if input string is "\"age\"" + patientsetConstraints <- patientsetConstraintsParsed + } + if(length(patientsetConstraintsParsed) > 1) { + message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", + "such as: \"age\" > 65.", + "\nWill attempt to parse the constraints out of the string, converting it", + "into an expression...")) + patientsetConstraints <- patientsetConstraintsParsed + } + }, silent = T) return(patientsetConstraints) } @@ -140,10 +140,10 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # concepts with type numeric and high_dimensional are end-leaves, # concepts with type categorical_options are not end-leaves - endLeaf <- "" + endLeaf <- logical() conceptListStudy <- cbind(conceptListStudy, endLeaf, stringsAsFactors = F) - conceptListStudy$endLeaf[conceptListStudy$type %in% c("NUMERIC", "HIGH_DIMENSIONAL")] <- "YES" - conceptListStudy$endLeaf[conceptListStudy$type == "CATEGORICAL_OPTION"] <- "NO" + conceptListStudy$endLeaf[conceptListStudy$type %in% c("NUMERIC", "HIGH_DIMENSIONAL")] <- T + conceptListStudy$endLeaf[conceptListStudy$type == "CATEGORICAL_OPTION"] <- F #find categorical data nodes, and set type of categorical end-leave (data node) to "CATEGORICAL_NODE" # concepts with 'type' categorical_option are the concept values. Take the concept path of the concept values and @@ -153,9 +153,9 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # containing the categorical value, to obtain path to categorical node categoricalNodes <- unique(categoricalNodes) - conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "YES" + conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- T conceptListStudy$type[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "CATEGORICAL_NODE" - conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & !conceptListStudy$fullName %in% categoricalNodes] <- "NO" + conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & !conceptListStudy$fullName %in% categoricalNodes] <- F return(conceptListStudy) } @@ -352,46 +352,45 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery parsedXML <- "" - if(all(names(xmlQuery)== "panel")){ - panels <- xmlChildren(xmlQuery) + # TODO: this test is not really needed + panels <- xmlChildren(xmlQuery) + + for(i in 1:length(panels)){ + panel <- panels[[i]] - for(i in 1:length(panels)){ - panel <- panels[[i]] - - if(i == 1){parsedXML <- paste(parsedXML, "( ", sep = "") #first panel - }else{parsedXML <- paste(parsedXML, "\n\n\t&\n\n( ", sep = "")} + if(i == 1){parsedXML <- paste(parsedXML, "( ", sep = "") #first panel + }else{parsedXML <- paste(parsedXML, "\n\n\t&\n\n( ", sep = "")} + + invert <- xmlValue(panel[["invert"]]) + if(invert == "1"){parsedXML <- paste(parsedXML, "!( ", sep = "") } + + #add the children + items <- xmlElementsByTagName(panel, "item") + for(j in 1:length(items)){ + item <- items[[j]] + if(j > 1){parsedXML <- paste(parsedXML, " | ", sep = "")} - invert <- xmlValue(panel[["invert"]]) - if(invert == "1"){parsedXML <- paste(parsedXML, "!( ", sep = "") } + #get concept path + item_key <- xmlValue(item[["item_key"]]) + concept_path <- gsub("\\\\\\\\Public Studies", "", item_key) + concept_path <- gsub("\\\\\\\\Private Studies", "", concept_path) + parsedXML <- paste(parsedXML, "\"", concept_path, "\"", sep = "") - #add the children - items <- xmlElementsByTagName(panel, "item") - for(j in 1:length(items)){ - item <- items[[j]] - if(j > 1){parsedXML <- paste(parsedXML, " | ", sep = "")} + #if constraint operator and constraint value are given, get these + childNames <- names(item) + if("constrain_by_value" %in% childNames){ + valueConstraints <- item[["constrain_by_value"]] + valueOperator <- xmlValue(valueConstraints[["value_operator"]]) + parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") + valueConstraint <- xmlValue(valueConstraints[["value_constraint"]]) + parsedXML <- paste(parsedXML, " ", valueConstraint, " ", sep = "") - #get concept path - item_key <- xmlValue(item[["item_key"]]) - concept_path <- gsub("\\\\\\\\Public Studies", "", item_key) - concept_path <- gsub("\\\\\\\\Private Studies", "", concept_path) - parsedXML <- paste(parsedXML, "\"", concept_path, "\"", sep = "") - - #if constraint operator and constraint value are given, get these - childNames <- names(item) - if("constrain_by_value" %in% childNames){ - valueConstraints <- item[["constrain_by_value"]] - valueOperator <- xmlValue(valueConstraints[["value_operator"]]) - parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") - valueConstraint <- xmlValue(valueConstraints[["value_constraint"]]) - parsedXML <- paste(parsedXML, " ", valueConstraint, " ", sep = "") - - } } - - #close brackets for panel - if(invert == "1"){parsedXML <- paste(parsedXML, " ))", sep = "") - }else{parsedXML <- paste(parsedXML, " )", sep = "") } } + + #close brackets for panel + if(invert == "1"){parsedXML <- paste(parsedXML, " ))", sep = "") + }else{parsedXML <- paste(parsedXML, " )", sep = "") } } if(parsedXML == ""){warning("Something went wrong with making a human readable version of the XML. This does not affect the formation of the patient set")} @@ -407,7 +406,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery splitPath <- strsplit(conceptPath, "\\\\")[[1]] nameHit <- grep(study.name, splitPath, ignore.case = T)[1] # take the first, just in case the study.name is repeated # in later part of path - studyPath <- paste(c(splitPath[1:nameHit], ""), collapse = "\\", sep = "") + studyPath <- paste0(c(splitPath[1:nameHit], ""), collapse = "\\") itemKey <- .makeItemKey(studyPath) panel <- xmlNode("panel", @@ -426,8 +425,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery for(i in 1:length(itemXMLtreeList)){ panel<- append.XMLNode(panel, itemXMLtreeList[[i]]) } - panel <- list(panel) - return(panel) + return(list(panel)) } # constraint is of format: {concept definition}{relational operator}{constraint_value}, e.g. "age" < 12. @@ -561,11 +559,12 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # \\Public Studies\Public Studies\Cell-line\Demographics\Age\ # \\Private Studies\Private Studies\Cell-line\Characteristics\Age\ .makeItemKey <- function(conceptPath){ - dimension <- strsplit(conceptPath, "\\\\")[[1]][2] #get first part of the concept path, that is either public or private study + dimension <- strsplit(conceptPath, "\\", fixed=T)[[1]][2] #get first part of the concept path, that is either public or private study - if(!dimension %in% c("Public Studies", "Private Studies")){ - stop("Could not determine the dimension for the item_key, that is used for the XML query")} - item_key <- paste("\\\\", dimension, conceptPath, sep = "") + # TODO: is dit nodig? + #if(!dimension %in% c("Public Studies", "Private Studies")){ + # stop("Could not determine the dimension for the item_key, that is used for the XML query")} + item_key <- paste0("\\\\", dimension, conceptPath) return(item_key) } @@ -702,9 +701,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # (ie. data node), either categorical or numerical, and if it's categorical it should be an end leave and not a # categorical value. If only a concept is supplied as a constraint, it is possible to also use other concepts that # are not end leaves, and high dimensional data nodes - in that case testIfEndLeave should be FALSE. - is.endLeaf <- studyConcepts$endLeaf[conceptMatch] == "YES" - - if(!is.endLeaf & testIfEndLeave){ + if(!studyConcepts$endLeaf[[conceptMatch]] & testIfEndLeave){ stop(paste("The supplied concept \'", concept, "\' is not a data node (ie. not an end leaf of the transmart tree).", "The supplied concept name/path/link must point to a single numerical or categorical", " data node (end leaf).", sep = "")) diff --git a/inst/unittests/runitCreatePatientSet.R b/inst/unittests/runitCreatePatientSet.R index e13f10b..7a7cb44 100644 --- a/inst/unittests/runitCreatePatientSet.R +++ b/inst/unittests/runitCreatePatientSet.R @@ -23,8 +23,10 @@ # with this program. If not, see . # concepts table for GSE8581 -gse8581conceptsLocation <- - system.file("unittests/resources/gse8581concepts.txt", package="transmartRClient") +self.location <- dirname(sys.frame(1)$ofile) +gse8581conceptsLocation <- paste0(self.location, "/resources/gse8581concepts.txt") +# system.file("unittests/resources/gse8581concepts.txt", package="transmartRClient") +#gse8581conceptsLocation <- "/home/jan/devel/transmart/RInterface/inst/unittests/resources/gse8581concepts.txt" gseconcepts <- read.table(gse8581conceptsLocation, header = T, stringsAsFactors = F, sep = "\t") From 1c71cbf76115e94065571a471eea5c3162b13bf2 Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Tue, 13 Sep 2016 17:30:12 +0200 Subject: [PATCH 19/26] :fish: fix tabs --- R/RClientConnectionManager.R | 4 ++-- R/createPatientSet.R | 28 ++++++++++++++-------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R/RClientConnectionManager.R b/R/RClientConnectionManager.R index c9f7733..c09af38 100644 --- a/R/RClientConnectionManager.R +++ b/R/RClientConnectionManager.R @@ -267,7 +267,7 @@ function (oauthDomain = transmartClientEnv$transmartDomain, prefetched.request.t # Wrap this in case we need to change json libraries again .fromJSON <- function(json) { - fromJSON(json, simplifyDataFrame=F, simplifyMatrix=F) + fromJSON(json, simplifyDataFrame=F, simplifyMatrix=F) } .serverMessageExchange <- @@ -299,7 +299,7 @@ function(apiCall, httpHeaderFields, accept.type = "default", post.body = NULL, p result$headers <- headers(req) result$status <- req$status_code result$statusMessage <- http_status(req)$message - switch(.contentType(result$headers), + switch(.contentType(result$headers), json = { result$content <- .fromJSON(result$content) result$JSON <- TRUE diff --git a/R/createPatientSet.R b/R/createPatientSet.R index 886e64e..73b30e6 100644 --- a/R/createPatientSet.R +++ b/R/createPatientSet.R @@ -76,7 +76,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery .checkPatientSetConstraints <- function(patientsetConstraints){ #test if it is expression and not a string. If string: try to parse if(!is.character(patientsetConstraints)) { - return(patientsetConstraints) + return(patientsetConstraints) } if(length(patientsetConstraints) > 1){ stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. @@ -85,7 +85,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # TODO: is deze try nodig? result <- try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] if(length(patientsetConstraintsParsed) == 1 && is.character(patientsetConstraintsParsed)) { - #e.g. happens if input string is "\"age\"" + #e.g. happens if input string is "\"age\"" patientsetConstraints <- patientsetConstraintsParsed } if(length(patientsetConstraintsParsed) > 1) { @@ -139,11 +139,11 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery if( any(! conceptTypes %in% c("CATEGORICAL_OPTION", "NUMERIC", "UNKNOWN", "HIGH_DIMENSIONAL"))){ warning("Unexpected concept type for one or more concepts in the selected study. - Determination which concepts are end-leaves of the tree might not work correcty in all cases. - This only affects the patient selection query if concepts with undetermined type are included in the query. - In that case this message is followed by an accompanying error. - You can help fix it by contacting us. Type ?transmartRClient for contact details. - \n") + Determination which concepts are end-leaves of the tree might not work correcty in all cases. + This only affects the patient selection query if concepts with undetermined type are included in the query. + In that case this message is followed by an accompanying error. + You can help fix it by contacting us. Type ?transmartRClient for contact details. + \n") } # concepts with type numeric and high_dimensional are end-leaves, @@ -401,7 +401,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery }else{parsedXML <- paste(parsedXML, " )", sep = "") } } if(parsedXML == ""){warning("Something went wrong with making a human readable version of the XML. - This does not affect the formation of the patient set")} + This does not affect the formation of the patient set")} return(parsedXML) } @@ -605,12 +605,12 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # concept name, or as a partial/full concept path or link .getConstraintConcept <- function(concept, subconstraint, studyConcepts, identical.match = F, testIfEndLeave = T){ info <- "Correct way to supply a concept (as part of a (sub)constraint) is: - either directly as a string, containing the concept name or path, - or indirectly as an object (variable) that contains a string with the concept name or path. - Supplying a concept link as found in the column \'api.link.observations.href\' of the data.frame retrieved by - getConcepts() should also work. - Example: if you want to select patients younger than 12, supply \"age\" directly as as string: \"age\" < 12 - or indirectly: concepts[2] < 12, where concepts[2] contains the string \"age\"." + either directly as a string, containing the concept name or path, + or indirectly as an object (variable) that contains a string with the concept name or path. + Supplying a concept link as found in the column \'api.link.observations.href\' of the data.frame retrieved by + getConcepts() should also work. + Example: if you want to select patients younger than 12, supply \"age\" directly as as string: \"age\" < 12 + or indirectly: concepts[2] < 12, where concepts[2] contains the string \"age\"." subconstraint <- .expressionToText(subconstraint) From e78a309cc6b9a2a0448221218344c68b5afc559c Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Wed, 14 Sep 2016 16:38:45 +0200 Subject: [PATCH 20/26] script.dirname function for unittests --- inst/unittests/runitCreatePatientSet.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/inst/unittests/runitCreatePatientSet.R b/inst/unittests/runitCreatePatientSet.R index 7a7cb44..6828a3e 100644 --- a/inst/unittests/runitCreatePatientSet.R +++ b/inst/unittests/runitCreatePatientSet.R @@ -22,6 +22,17 @@ # You should have received a copy of the GNU General Public License along # with this program. If not, see . +script.dirname <- function() { + for (i in 0:-sys.nframe()) { + path <- sys.frame(i)$ofile + #message(paste0(i, ": ", path)) + if(!is.null(path)) return(dirname(path)) + } + stop("Directory name of running script not found") +} +self.location <- script.dirname() +#message(paste0("location: ", self.location)) + # concepts table for GSE8581 self.location <- dirname(sys.frame(1)$ofile) gse8581conceptsLocation <- paste0(self.location, "/resources/gse8581concepts.txt") From 894bd9614fa8ebe5fd532b8d634381de605e2ff4 Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Wed, 14 Sep 2016 16:50:51 +0200 Subject: [PATCH 21/26] TRANSMART_RINTERFACE_PKG_ROOT variable for testing in development script.dirname doesn't actually work within runit. --- inst/unittests/runitCreatePatientSet.R | 23 +++++++++-------------- tests/run-tests.R | 3 +++ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/inst/unittests/runitCreatePatientSet.R b/inst/unittests/runitCreatePatientSet.R index 6828a3e..fd063d9 100644 --- a/inst/unittests/runitCreatePatientSet.R +++ b/inst/unittests/runitCreatePatientSet.R @@ -22,22 +22,17 @@ # You should have received a copy of the GNU General Public License along # with this program. If not, see . -script.dirname <- function() { - for (i in 0:-sys.nframe()) { - path <- sys.frame(i)$ofile - #message(paste0(i, ": ", path)) - if(!is.null(path)) return(dirname(path)) - } - stop("Directory name of running script not found") -} -self.location <- script.dirname() -#message(paste0("location: ", self.location)) # concepts table for GSE8581 -self.location <- dirname(sys.frame(1)$ofile) -gse8581conceptsLocation <- paste0(self.location, "/resources/gse8581concepts.txt") -# system.file("unittests/resources/gse8581concepts.txt", package="transmartRClient") -#gse8581conceptsLocation <- "/home/jan/devel/transmart/RInterface/inst/unittests/resources/gse8581concepts.txt" +gse8581conceptsLocation <- + if (exists("TRANSMART_RINTERFACE_PKG_ROOT")) { + # Development setting + message(paste0("Loading unit tests from ", TRANSMART_RINTERFACE_PKG_ROOT)) + paste0(TRANSMART_RINTERFACE_PKG_ROOT, "/inst/unittests/resources/gse8581concepts.txt") + } else { + # Assume the package is installed + system.file("unittests/resources/gse8581concepts.txt", package="transmartRClient") + } gseconcepts <- read.table(gse8581conceptsLocation, header = T, stringsAsFactors = F, sep = "\t") diff --git a/tests/run-tests.R b/tests/run-tests.R index eb2e9f9..eb01f7f 100644 --- a/tests/run-tests.R +++ b/tests/run-tests.R @@ -25,6 +25,9 @@ library('RUnit') require("transmartRClient") +# If you want to run these tests in development on a not installed version of this package, do: +# TRANSMART_RINTERFACE_PKG_ROOT <- "/path/to/transmart/RInterface" + unittestsLocation <- system.file("unittests", package="transmartRClient") test.suite <- defineTestSuite("highdimTests", From 676e7ffc32540a733c67eaa709fce9a31c6e6e08 Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Thu, 15 Sep 2016 11:01:21 +0200 Subject: [PATCH 22/26] Change gse8581concepts endLeaf from string to logical Used in unittests --- inst/unittests/resources/gse8581concepts.txt | 74 ++++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/inst/unittests/resources/gse8581concepts.txt b/inst/unittests/resources/gse8581concepts.txt index 03b5fb4..8f4a287 100644 --- a/inst/unittests/resources/gse8581concepts.txt +++ b/inst/unittests/resources/gse8581concepts.txt @@ -1,38 +1,38 @@ name fullName type api.link.self.href endLeaf -Afro American \Public Studies\GSE8581\Subjects\Ethnicity\Afro American\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Ethnicity/Afro%20American NO -Age (year) \Public Studies\GSE8581\Subjects\Age (year)\ NUMERIC /studies/gse8581/concepts/Subjects/Age%20%28year%29 YES -Biomarker_Data \Public Studies\GSE8581\MRNA\Biomarker_Data\ UNKNOWN /studies/gse8581/concepts/MRNA/Biomarker_Data NO -carcinoid \Public Studies\GSE8581\Endpoints\Diagnosis\carcinoid\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/carcinoid NO -Caucasian \Public Studies\GSE8581\Subjects\Ethnicity\Caucasian\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Ethnicity/Caucasian NO -chronic obstructive pulmonary disease \Public Studies\GSE8581\Subjects\Lung Disease\chronic obstructive pulmonary disease\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/chronic%20obstructive%20pulmonary%20disease NO -control \Public Studies\GSE8581\Subjects\Lung Disease\control\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/control NO -Diagnosis \Public Studies\GSE8581\Endpoints\Diagnosis\ CATEGORICAL_NODE /studies/gse8581/concepts/Endpoints/Diagnosis YES -emphysema \Public Studies\GSE8581\Endpoints\Diagnosis\emphysema\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/emphysema NO -Endpoints \Public Studies\GSE8581\Endpoints\ UNKNOWN /studies/gse8581/concepts/Endpoints NO -Ethnicity \Public Studies\GSE8581\Subjects\Ethnicity\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Ethnicity YES -female \Public Studies\GSE8581\Subjects\Sex\female\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Sex/female NO -FEV1 \Public Studies\GSE8581\Endpoints\FEV1\ NUMERIC /studies/gse8581/concepts/Endpoints/FEV1 YES -Forced Expiratory Volume Ratio \Public Studies\GSE8581\Endpoints\Forced Expiratory Volume Ratio\ NUMERIC /studies/gse8581/concepts/Endpoints/Forced%20Expiratory%20Volume%20Ratio YES -giant bullae \Public Studies\GSE8581\Endpoints\Diagnosis\giant bullae\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/giant%20bullae NO -Giant Cell Tumor \Public Studies\GSE8581\Endpoints\Diagnosis\Giant Cell Tumor\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/Giant%20Cell%20Tumor NO -GPL570_BOGUS \Public Studies\GSE8581\MRNA\Biomarker_Data\GPL570_BOGUS\ UNKNOWN /studies/gse8581/concepts/MRNA/Biomarker_Data/GPL570_BOGUS NO -Height (inch) \Public Studies\GSE8581\Subjects\Height (inch)\ NUMERIC /studies/gse8581/concepts/Subjects/Height%20%28inch%29 YES -hematoma \Public Studies\GSE8581\Endpoints\Diagnosis\hematoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/hematoma NO -Homo sapiens \Public Studies\GSE8581\Subjects\Organism\Homo sapiens\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Organism/Homo%20sapiens NO -inflammation \Public Studies\GSE8581\Endpoints\Diagnosis\inflammation\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/inflammation NO -Lung \Public Studies\GSE8581\MRNA\Biomarker_Data\GPL570_BOGUS\Lung\ HIGH_DIMENSIONAL /studies/gse8581/concepts/MRNA/Biomarker_Data/GPL570_BOGUS/Lung YES -Lung Disease \Public Studies\GSE8581\Subjects\Lung Disease\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Lung%20Disease YES -lymphoma \Public Studies\GSE8581\Endpoints\Diagnosis\lymphoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/lymphoma NO -male \Public Studies\GSE8581\Subjects\Sex\male\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Sex/male NO -metastatic non-small cell adenocarcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\metastatic non-small cell adenocarcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/metastatic%20non-small%20cell%20adenocarcinoma NO -metastatic renal cell carcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\metastatic renal cell carcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/metastatic%20renal%20cell%20carcinoma NO -MRNA \Public Studies\GSE8581\MRNA\ UNKNOWN /studies/gse8581/concepts/MRNA NO -no malignancy \Public Studies\GSE8581\Endpoints\Diagnosis\no malignancy\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/no%20malignancy NO -non-small cell adenocarcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\non-small cell adenocarcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/non-small%20cell%20adenocarcinoma NO -non-small cell squamous cell carcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\non-small cell squamous cell carcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/non-small%20cell%20squamous%20cell%20carcinoma NO -not specified \Public Studies\GSE8581\Subjects\Lung Disease\not specified\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/not%20specified NO -NSC-Mixed \Public Studies\GSE8581\Endpoints\Diagnosis\NSC-Mixed\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/NSC-Mixed NO -Organism \Public Studies\GSE8581\Subjects\Organism\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Organism YES -Sex \Public Studies\GSE8581\Subjects\Sex\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Sex YES -Subjects \Public Studies\GSE8581\Subjects\ UNKNOWN /studies/gse8581/concepts/Subjects NO -Unknown \Public Studies\GSE8581\Endpoints\Diagnosis\Unknown\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/Unknown NO +Afro American \Public Studies\GSE8581\Subjects\Ethnicity\Afro American\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Ethnicity/Afro%20American FALSE +Age (year) \Public Studies\GSE8581\Subjects\Age (year)\ NUMERIC /studies/gse8581/concepts/Subjects/Age%20%28year%29 TRUE +Biomarker_Data \Public Studies\GSE8581\MRNA\Biomarker_Data\ UNKNOWN /studies/gse8581/concepts/MRNA/Biomarker_Data FALSE +carcinoid \Public Studies\GSE8581\Endpoints\Diagnosis\carcinoid\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/carcinoid FALSE +Caucasian \Public Studies\GSE8581\Subjects\Ethnicity\Caucasian\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Ethnicity/Caucasian FALSE +chronic obstructive pulmonary disease \Public Studies\GSE8581\Subjects\Lung Disease\chronic obstructive pulmonary disease\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/chronic%20obstructive%20pulmonary%20disease FALSE +control \Public Studies\GSE8581\Subjects\Lung Disease\control\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/control FALSE +Diagnosis \Public Studies\GSE8581\Endpoints\Diagnosis\ CATEGORICAL_NODE /studies/gse8581/concepts/Endpoints/Diagnosis TRUE +emphysema \Public Studies\GSE8581\Endpoints\Diagnosis\emphysema\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/emphysema FALSE +Endpoints \Public Studies\GSE8581\Endpoints\ UNKNOWN /studies/gse8581/concepts/Endpoints FALSE +Ethnicity \Public Studies\GSE8581\Subjects\Ethnicity\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Ethnicity TRUE +female \Public Studies\GSE8581\Subjects\Sex\female\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Sex/female FALSE +FEV1 \Public Studies\GSE8581\Endpoints\FEV1\ NUMERIC /studies/gse8581/concepts/Endpoints/FEV1 TRUE +Forced Expiratory Volume Ratio \Public Studies\GSE8581\Endpoints\Forced Expiratory Volume Ratio\ NUMERIC /studies/gse8581/concepts/Endpoints/Forced%20Expiratory%20Volume%20Ratio TRUE +giant bullae \Public Studies\GSE8581\Endpoints\Diagnosis\giant bullae\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/giant%20bullae FALSE +Giant Cell Tumor \Public Studies\GSE8581\Endpoints\Diagnosis\Giant Cell Tumor\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/Giant%20Cell%20Tumor FALSE +GPL570_BOGUS \Public Studies\GSE8581\MRNA\Biomarker_Data\GPL570_BOGUS\ UNKNOWN /studies/gse8581/concepts/MRNA/Biomarker_Data/GPL570_BOGUS FALSE +Height (inch) \Public Studies\GSE8581\Subjects\Height (inch)\ NUMERIC /studies/gse8581/concepts/Subjects/Height%20%28inch%29 TRUE +hematoma \Public Studies\GSE8581\Endpoints\Diagnosis\hematoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/hematoma FALSE +Homo sapiens \Public Studies\GSE8581\Subjects\Organism\Homo sapiens\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Organism/Homo%20sapiens FALSE +inflammation \Public Studies\GSE8581\Endpoints\Diagnosis\inflammation\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/inflammation FALSE +Lung \Public Studies\GSE8581\MRNA\Biomarker_Data\GPL570_BOGUS\Lung\ HIGH_DIMENSIONAL /studies/gse8581/concepts/MRNA/Biomarker_Data/GPL570_BOGUS/Lung TRUE +Lung Disease \Public Studies\GSE8581\Subjects\Lung Disease\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Lung%20Disease TRUE +lymphoma \Public Studies\GSE8581\Endpoints\Diagnosis\lymphoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/lymphoma FALSE +male \Public Studies\GSE8581\Subjects\Sex\male\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Sex/male FALSE +metastatic non-small cell adenocarcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\metastatic non-small cell adenocarcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/metastatic%20non-small%20cell%20adenocarcinoma FALSE +metastatic renal cell carcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\metastatic renal cell carcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/metastatic%20renal%20cell%20carcinoma FALSE +MRNA \Public Studies\GSE8581\MRNA\ UNKNOWN /studies/gse8581/concepts/MRNA FALSE +no malignancy \Public Studies\GSE8581\Endpoints\Diagnosis\no malignancy\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/no%20malignancy FALSE +non-small cell adenocarcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\non-small cell adenocarcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/non-small%20cell%20adenocarcinoma FALSE +non-small cell squamous cell carcinoma \Public Studies\GSE8581\Endpoints\Diagnosis\non-small cell squamous cell carcinoma\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/non-small%20cell%20squamous%20cell%20carcinoma FALSE +not specified \Public Studies\GSE8581\Subjects\Lung Disease\not specified\ CATEGORICAL_OPTION /studies/gse8581/concepts/Subjects/Lung%20Disease/not%20specified FALSE +NSC-Mixed \Public Studies\GSE8581\Endpoints\Diagnosis\NSC-Mixed\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/NSC-Mixed FALSE +Organism \Public Studies\GSE8581\Subjects\Organism\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Organism TRUE +Sex \Public Studies\GSE8581\Subjects\Sex\ CATEGORICAL_NODE /studies/gse8581/concepts/Subjects/Sex TRUE +Subjects \Public Studies\GSE8581\Subjects\ UNKNOWN /studies/gse8581/concepts/Subjects FALSE +Unknown \Public Studies\GSE8581\Endpoints\Diagnosis\Unknown\ CATEGORICAL_OPTION /studies/gse8581/concepts/Endpoints/Diagnosis/Unknown FALSE From 68b4316f9e5b17bd506fedda6752994224b30c28 Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Thu, 15 Sep 2016 12:44:44 +0200 Subject: [PATCH 23/26] Reduce verbosity --- R/RClientConnectionManager.R | 6 +++++- R/createPatientSet.R | 31 ++++++++++++++++--------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/R/RClientConnectionManager.R b/R/RClientConnectionManager.R index c09af38..cee902a 100644 --- a/R/RClientConnectionManager.R +++ b/R/RClientConnectionManager.R @@ -22,6 +22,10 @@ # You should have received a copy of the GNU General Public License along # with this program. If not, see . +.message <- function(msg) { + if(getOption("verbose")) message(msg) +} + connectToTransmart <- function (transmartDomain, use.authentication = TRUE, token = NULL, .access.token = NULL, ...) { if (!exists("transmartClientEnv") || transmartClientEnv$transmartDomain != transmartDomain) { @@ -171,7 +175,7 @@ function (oauthDomain = transmartClientEnv$transmartDomain, prefetched.request.t # Maybe we're talking to an older version of Transmart that uses the version 1 oauth plugin ping <- .transmartServerGetRequest("/oauth/verify", accept.type = "default", onlyContent = F) } - if (getOption("verbose")) { message(paste(ping$content, collapse = ": ")) } + .message(paste(ping$content, collapse = ": ")) if(ping$status == 200) { return(TRUE) } diff --git a/R/createPatientSet.R b/R/createPatientSet.R index 73b30e6..a211548 100644 --- a/R/createPatientSet.R +++ b/R/createPatientSet.R @@ -21,15 +21,14 @@ # You should have received a copy of the GNU General Public License along # with this program. If not, see . - # Retrieve patient.set ID from tranSMART database, based on the constraints given by the user. # Patient.set constraints are provided as an expression in the shape of, for example, # (c1 | c2) & (c3|c4|c5) & c6 &... where c is either a constraint built up as {concept}{operator}{constraint_value} # (e.g. "age" < 60) or a reference to a concept (e.g. "age") createPatientSet <- function(study.name, patientset.constraints, returnXMLquery = F){ - if(missing(study.name)){stop("Provide study name")} - if(missing(patientset.constraints)){stop("Provide patientset.constraints")} - message("\nProcessing input...", "") + if(missing(study.name)) { stop("Provide study name") } + if(missing(patientset.constraints)) { stop("Provide patientset.constraints") } + .message("\nProcessing input...", "") # retrieve the expression that defines the constraints patientset.constraints <- substitute(patientset.constraints) #needs to be like this, with possible later evaluation @@ -52,10 +51,10 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery xmlQuery <- .buildXMLquery(patientset.constraints, studyConcepts, study.name) hrConstraints <- .makeSummaryOfQuery(xmlQuery) xmlQuery <- saveXML(xmlQuery, prefix = '\n') #convert XML tree to string - if(getOption("verbose")) { message(xmlQuery) } + .message(xmlQuery) # do POST request, and store result - message("\nCreating patient set...", "") + .message("\nCreating patient set...", "") serverResult <- .transmartGetJSON("/patient_sets", post.body = xmlQuery, post.content.type ="text/xml;charset=UTF-8", onlyContent = c(201)) @@ -66,7 +65,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery input_patientset.constraints = .expressionToText(patientset.constraints), finalQueryConstraints = hrConstraints) - message(paste("\nBased on the input, the following constraints were defined and sent to the server", + .message(paste("\nBased on the input, the following constraints were defined and sent to the server", " (always includes study concept):\n", result$finalQueryConstraints, sep = ""), "") if(returnXMLquery){result[["xmlQuery"]] <- xmlQuery} return(result) @@ -89,7 +88,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery patientsetConstraints <- patientsetConstraintsParsed } if(length(patientsetConstraintsParsed) > 1) { - message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", + .message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", "such as: \"age\" > 65.", "\nWill attempt to parse the constraints out of the string, converting it", "into an expression...")) @@ -449,13 +448,15 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery tmpValue <- try(eval(constraint$value, envir = globalenv()), silent = T) if(class(tmpValue) == "try-error"){ try_error <- attr(tmpValue, "condition")$message - err_message <- paste(try_error, ". Object was specified in (sub)constraint ", - .expressionToText(patientsetConstraints) , ".\n", sep = "") + err_message <- paste0(try_error, ". Object was specified in (sub)constraint ", + .expressionToText(patientsetConstraints) , ".\n") stop(err_message) } if(length(tmpValue) >1){ - message("\nInput for constraint_value: ") - print(tmpValue) + if(getOption("verbose")) { + message("\nInput for constraint_value: ") + print(tmpValue) + } stop(paste("Incorrect input for constraint_value in (sub)constraint: ", .expressionToText(patientsetConstraints), ".\nObject length of \'", constraint$value , "\' is larger than 1.", "Only a single input value (string/number) is allowed as a constraint_value.")) @@ -619,7 +620,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery result <- try(eval(concept, envir = globalenv()), silent = T) if(class(result) == "try-error"){ try_error <- attr(result, "condition")$message - err_message <- paste(try_error, ". Object was specified in subconstraint ", subconstraint, ".\n", info, sep = "") + err_message <- paste0(try_error, ". Object was specified in subconstraint ", subconstraint, ".\n", info) stop(err_message) } if(length(result) >1){ @@ -683,7 +684,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery } if(is.concept.link){ - message("\nDetecting a concept.link. Will attempt to find matching concept path.") + .message("\nDetecting a concept.link. Will attempt to find matching concept path.") conceptMatch <- grep(concept, studyConcepts$api.link.self.href) if(length(conceptMatch) > 1){ conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, @@ -717,7 +718,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery matched_concept = list(conceptPath = studyConcepts$fullName[conceptMatch], conceptType = studyConcepts$type[conceptMatch]) - message(paste("\nMatched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, + .message(paste("\nMatched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, "\'\n to concept (full path): \'", matched_concept$conceptPath, "\'\n", sep = "") ) return(matched_concept) } From c8307198e7489bb15217127fe82c0db4f9cda314 Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Thu, 15 Sep 2016 15:27:42 +0200 Subject: [PATCH 24/26] Style changes - Change whitespace to conform to the rest of the module - replace paste(..., sep="") by paste0 - some other minor changes --- R/RClientConnectionManager.R | 4 +- R/createPatientSet.R | 356 +++++++++++++++++------------------ tests/run-tests.R | 2 +- 3 files changed, 177 insertions(+), 185 deletions(-) diff --git a/R/RClientConnectionManager.R b/R/RClientConnectionManager.R index cee902a..74d0813 100644 --- a/R/RClientConnectionManager.R +++ b/R/RClientConnectionManager.R @@ -22,8 +22,8 @@ # You should have received a copy of the GNU General Public License along # with this program. If not, see . -.message <- function(msg) { - if(getOption("verbose")) message(msg) +.message <- function(...) { + if(getOption("verbose")) message(...) } connectToTransmart <- diff --git a/R/createPatientSet.R b/R/createPatientSet.R index a211548..c0a1621 100644 --- a/R/createPatientSet.R +++ b/R/createPatientSet.R @@ -73,49 +73,50 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery .checkPatientSetConstraints <- function(patientsetConstraints){ - #test if it is expression and not a string. If string: try to parse + #test if it is an expression and not a string. If string: try to parse if(!is.character(patientsetConstraints)) { return(patientsetConstraints) } - if(length(patientsetConstraints) > 1){ + if(length(patientsetConstraints) > 1) { stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. - The patient set constraints should be supplied in one single expression (or string).")} + The patient set constraints should be supplied in one single expression (or string).")} - # TODO: is deze try nodig? - result <- try({patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] - if(length(patientsetConstraintsParsed) == 1 && is.character(patientsetConstraintsParsed)) { + result <- try({ + patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] + if(length(patientsetConstraintsParsed) == 1 && is.character(patientsetConstraintsParsed)) { #e.g. happens if input string is "\"age\"" - patientsetConstraints <- patientsetConstraintsParsed - } - if(length(patientsetConstraintsParsed) > 1) { - .message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", + patientsetConstraints <- patientsetConstraintsParsed + } + if(length(patientsetConstraintsParsed) > 1) { + .message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", "such as: \"age\" > 65.", "\nWill attempt to parse the constraints out of the string, converting it", "into an expression...")) - patientsetConstraints <- patientsetConstraintsParsed - } + patientsetConstraints <- patientsetConstraintsParsed + } }, silent = T) - if(class(result) == "try-error"){ - errorText <- paste("Detected a string as input for patient set constraints. Have tried to parse the", - "constraints out of the string to convert it into an expression, but the attempt to", - "parse the constraints out of the string failed:\n\n", - result[1], "\nPlease check the format of your input.", - "Type ?createPatientSet for more details on the expected format.") - stop(errorText) + + if(class(result) == "try-error") { + stop(paste("Detected a string as input for patient set constraints. Have tried to parse the", + "constraints out of the string to convert it into an expression, but the attempt to", + "parse the constraints out of the string failed:\n\n", + result[1], "\nPlease check the format of your input.", + "Type ?createPatientSet for more details on the expected format.")) } + return(patientsetConstraints) } # parse the constraints, and turn it into a query in XML format -.buildXMLquery <- function(patientset.constraints, studyConcepts, study.name){ +.buildXMLquery <- function(patientset.constraints, studyConcepts, study.name) { ## parse the expression containing the constraints and translate this into a query definition in XML format parsedConstraintsXMLlist <- .parsePatientSetConstraints(patientset.constraints, studyConcepts) # parsePatientSetConstraints returns a list with XML trees, these trees all either have items as top XMLnodes or # panels. If the top nodes of the trees are items, add these items to a panel node and add this new node to a list. - if(xmlName(parsedConstraintsXMLlist[[1]]) == "item"){ + if(xmlName(parsedConstraintsXMLlist[[1]]) == "item") { parsedConstraintsXMLlist <- .makePanelList(parsedConstraintsXMLlist) } @@ -124,7 +125,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # build XML formatted query xmlQuery <- xmlNode("qd:query_definition", namespaceDefinitions = c(qd="http://www.i2b2.org/xsd/cell/crc/psm/1.1/")) - for(i in 1:length(parsedConstraintsXMLlist)){ + for(i in 1:length(parsedConstraintsXMLlist)) { xmlQuery <- append.XMLNode(xmlQuery, parsedConstraintsXMLlist[[i]]) } return(xmlQuery) @@ -133,10 +134,10 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # determine for each concept in the concept table whether a concept is an end leaf of the tree, ie. if it is a data node # (which can be either a numeric, categorical or highdim node) -.findEndLeaves <- function(conceptListStudy){ +.findEndLeaves <- function(conceptListStudy) { conceptTypes <- unique(conceptListStudy$type) - if( any(! conceptTypes %in% c("CATEGORICAL_OPTION", "NUMERIC", "UNKNOWN", "HIGH_DIMENSIONAL"))){ + if( any(! conceptTypes %in% c("CATEGORICAL_OPTION", "NUMERIC", "UNKNOWN", "HIGH_DIMENSIONAL"))) { warning("Unexpected concept type for one or more concepts in the selected study. Determination which concepts are end-leaves of the tree might not work correcty in all cases. This only affects the patient selection query if concepts with undetermined type are included in the query. @@ -161,8 +162,8 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery categoricalNodes <- unique(categoricalNodes) conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- T - conceptListStudy$type[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "CATEGORICAL_NODE" conceptListStudy$endLeaf[conceptListStudy$type == "UNKNOWN" & !conceptListStudy$fullName %in% categoricalNodes] <- F + conceptListStudy$type[conceptListStudy$type == "UNKNOWN" & conceptListStudy$fullName %in% categoricalNodes] <- "CATEGORICAL_NODE" return(conceptListStudy) } @@ -170,7 +171,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # parsePatientSetConstraints takes an expression defining the constraints for the patientset and returns # either a list of item XMLtrees or list of panel XMLtrees -.parsePatientSetConstraints <- function(patientsetConstraints, studyConcepts){ +.parsePatientSetConstraints <- function(patientsetConstraints, studyConcepts) { relationalOperators <- c("<", ">", "<=",">=", "==", "!=") logicalOperators <- c("&","&&", "|", "||") @@ -180,9 +181,9 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # construct a message that's used later on, when an error occurs. This message includes a listing of the different # elements (sub units) of the constraint expression, if verbose == T elementsMsg <- "" - if(verbose){ + if(verbose) { subUnits <- "" - for(i in 1:length(patientsetConstraints)){ + for(i in 1:length(patientsetConstraints)) { subUnits <- paste(subUnits, paste("\n\tElement ", i,": ", .expressionToText(patientsetConstraints[[i]]), sep = "")) } elementsMsg <- paste("\nElements of the (sub)constraint after parsing", subUnits,sep = "") @@ -200,14 +201,14 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # data.frame$firstColumn[firstRow],etc - if(is.symbol(patientsetConstraints)){ + if(is.symbol(patientsetConstraints)) { firstElement_in_allowedOperators <- F - }else{ + } else { firstElement <- as.character(patientsetConstraints[[1]]) firstElement_in_allowedOperators <- firstElement%in% allowedOperators } - if(length(patientsetConstraints) == 3 & firstElement_in_allowedOperators){ + if(length(patientsetConstraints) == 3 & firstElement_in_allowedOperators) { constraintOperator <- firstElement constraint <- list() @@ -219,10 +220,10 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # concept has to satisfy to: # [[1]] contains a relational operator, [[2]] the concept, [[3]] the constraint value is.singleConstraint <- constraintOperator %in% relationalOperators - if(is.singleConstraint){ + if(is.singleConstraint) { itemXMLlist <- list(.parseSingleConstraint(patientsetConstraints, studyConcepts)) return(itemXMLlist) - }else{ + } else { # it's a concatenation of constraints: call function again on the subconstraints. # right now it only supports the format where the & operators are always the highest level operators and the # | operators are only used as lowest level, forcing the format: (c1|c2)&c3&(c4|c5|c6|...)& ... @@ -233,7 +234,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # if there is an "OR" operation inbetween two subconstraints, the combination of those two subconstraints # cannot have an & anymore (this is for forcing the strict format for constraint definition described above) if(constraintOperator == "|" ) { - if(grepl("&", .expressionToText(patientsetConstraints))){ + if(grepl("&", .expressionToText(patientsetConstraints))) { stop(paste("Wrong format of (sub)constraint definition. Found in (sub)constraint: ", .expressionToText(patientsetConstraints), "\nRight now the only format supported for defining patientset constraints is one where the & ", @@ -251,30 +252,30 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # treeBeforeOperator/treeAfterOperator can be either a list of items or a list of panels # if it contains a list of items: add the items of that list to a panel node - if(constraintOperator == "&"){ - if(xmlName(treeBeforeOperator[[1]]) == "item"){ + if(constraintOperator == "&") { + if(xmlName(treeBeforeOperator[[1]]) == "item") { beforePanels <- .makePanelList(treeBeforeOperator) } - if(xmlName(treeBeforeOperator[[1]]) == "panel"){ - beforePanels <- treeBeforeOperator + if(xmlName(treeBeforeOperator[[1]]) == "panel") { + beforePanels <- treeBeforeOperator } - if(xmlName(treeAfterOperator[[1]]) == "item"){ + if(xmlName(treeAfterOperator[[1]]) == "item") { afterPanels <- .makePanelList(treeAfterOperator) } - if(xmlName(treeAfterOperator[[1]]) == "panel"){ - afterPanels <- treeAfterOperator + if(xmlName(treeAfterOperator[[1]]) == "panel") { + afterPanels <- treeAfterOperator } panelList <- c(beforePanels, afterPanels) return(panelList) } } - }else if(class(patientsetConstraints) == "("){ + } else if(class(patientsetConstraints) == "(") { # expression is surrounded by brackets: take expression between brackets and call function again # element [[2]] contains the expression between the brackets, element [[1]] is '(' xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints[[2]], studyConcepts) return(xmlTreeList) - }else if(length(patientsetConstraints) == 1 & is.character(patientsetConstraints)) { + } else if(length(patientsetConstraints) == 1 & is.character(patientsetConstraints)) { # Then the (sub)constraint should consist of only a specification of a concept. # This will result in selection of all patients that have a value for this concept. # Concept specification can be a string containing a pattern to match to the concept name or a concept path or link, @@ -285,7 +286,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # make itemTree for that concept itemXMLlist <- list(xmlNode("item", xmlNode("item_key", .makeItemKey(conceptPath)))) return(itemXMLlist) - }else if(class(patientsetConstraints) == "name" | class(patientsetConstraints) == "call"){ + } else if(class(patientsetConstraints) == "name" | class(patientsetConstraints) == "call") { # alternatively it is an expression containing a call to substite or an object (with or without index), # e.g. variables[1] or variable, data.frame$firstColumn[firstRow],etc - this object can # contain a string specifying a concept, or a string that in itself is a constraint definition, or an @@ -296,20 +297,19 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # tmp <- c("\"age\" <65", "\"sex\"== \"female\""); createPatientSet("Some study", tmp[1]) # try to evaluate the expression and find a matching constraint concept result <- try(eval(patientsetConstraints, envir = globalenv()), silent = T) - if(class(result) == "try-error"){ + if(class(result) == "try-error") { stop(paste(attr(result, "condition")$message, "\n",errorMsg, sep = "")) } - if(is.list(result)) - { - if(length(result) > 1){ + if(is.list(result)) { + if(length(result) > 1) { stop(paste("Incorrect input for patient set constraints.\n", "Evaluation of input", .expressionToText(patientsetConstraints), "results in a list with more than one element", "while the function expects only a single string or a single expression", "(as created with function substitute), not multiple.")) } - if(length(result) == 1){ + if(length(result) == 1) { warning(paste("Evaluation of input", .expressionToText(patientsetConstraints), "results in a list with a single element.", "Expected is a string or an expression (as created with function substitute).", @@ -317,10 +317,8 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery result <- result[[1]] } } - if(length(result)==1) - { - if(is.na(result)) - { + if(length(result)==1) { + if(is.na(result)) { stop(paste("Content of \'",.expressionToText(patientsetConstraints), "\' is 'NA'.", " Cannot use 'NA' as constraint definition/concept specification.", sep = "")) } @@ -330,7 +328,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery xmlTreeList <- .parsePatientSetConstraints(patientsetConstraints, studyConcepts) return(xmlTreeList) - }else{ + } else { stop(errorMsg) } } @@ -338,10 +336,10 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # the deparse function converts expressions to strings. However it cuts the strings of at a certain bytelength, # so a long expression could result in a character vector with several portions of the original expression # this function makes one string out of the vector again -.expressionToText <- function(expression){ +.expressionToText <- function(expression) { textExpression <- deparse(expression, width.cutoff = 500) - if(length(textExpression)>1){ + if(length(textExpression)>1) { textExpressionPasted <- gsub("^[[:blank:]]+", "", textExpression) textExpressionPasted <- paste(textExpressionPasted, collapse = "") @@ -355,27 +353,25 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery } -.makeSummaryOfQuery <- function(xmlQuery){ +.makeSummaryOfQuery <- function(xmlQuery) { parsedXML <- "" - - # TODO: this test is not really needed panels <- xmlChildren(xmlQuery) - for(i in 1:length(panels)){ + for(i in 1:length(panels)) { panel <- panels[[i]] - if(i == 1){parsedXML <- paste(parsedXML, "( ", sep = "") #first panel - }else{parsedXML <- paste(parsedXML, "\n\n\t&\n\n( ", sep = "")} + if(i == 1) {parsedXML <- paste(parsedXML, "( ", sep = "") } #first panel + else { parsedXML <- paste(parsedXML, "\n\n\t&\n\n( ", sep = "") } invert <- xmlValue(panel[["invert"]]) - if(invert == "1"){parsedXML <- paste(parsedXML, "!( ", sep = "") } + if(invert == "1") { parsedXML <- paste(parsedXML, "!( ", sep = "") } #add the children items <- xmlElementsByTagName(panel, "item") - for(j in 1:length(items)){ + for(j in 1:length(items)) { item <- items[[j]] - if(j > 1){parsedXML <- paste(parsedXML, " | ", sep = "")} + if(j > 1) { parsedXML <- paste(parsedXML, " | ", sep = "") } #get concept path item_key <- xmlValue(item[["item_key"]]) @@ -385,28 +381,27 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery #if constraint operator and constraint value are given, get these childNames <- names(item) - if("constrain_by_value" %in% childNames){ + if("constrain_by_value" %in% childNames) { valueConstraints <- item[["constrain_by_value"]] valueOperator <- xmlValue(valueConstraints[["value_operator"]]) - parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") + parsedXML <- paste(parsedXML, " ", valueOperator, " ", sep = "") valueConstraint <- xmlValue(valueConstraints[["value_constraint"]]) parsedXML <- paste(parsedXML, " ", valueConstraint, " ", sep = "") - } } #close brackets for panel - if(invert == "1"){parsedXML <- paste(parsedXML, " ))", sep = "") - }else{parsedXML <- paste(parsedXML, " )", sep = "") } + if(invert == "1") { parsedXML <- paste(parsedXML, " ))", sep = "") } + else { parsedXML <- paste(parsedXML, " )", sep = "") } } - if(parsedXML == ""){warning("Something went wrong with making a human readable version of the XML. - This does not affect the formation of the patient set")} + if(parsedXML == "") { warning("Something went wrong with making a human readable version of the XML. + This does not affect the formation of the patient set") } return(parsedXML) } #just needs one conceptPath, can be of any of the concepts in the study. It can be any path in column 'fullName' -.addStudyPanel <- function (constraintXMLlist, study.name, conceptPath){ +.addStudyPanel <- function (constraintXMLlist, study.name, conceptPath) { # retrieve the path for the study concept, by taking only the first part of the supplied concept path up to and # including the study.name. # e.g. take "\\Public Studies\\GSE8581\\" from "\\Public Studies\\GSE8581\\Subjects\\Ethnicity\\Afro American\\" @@ -427,16 +422,16 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # it expects a list of "item" XML trees. It will add all items to a panel XML node, # and returns that node as part of a list -.makePanelList <- function(itemXMLtreeList){ +.makePanelList <- function(itemXMLtreeList) { panel <- xmlNode("panel", xmlNode("invert", 0)) - for(i in 1:length(itemXMLtreeList)){ + for(i in 1:length(itemXMLtreeList)) { panel<- append.XMLNode(panel, itemXMLtreeList[[i]]) } return(list(panel)) } # constraint is of format: {concept definition}{relational operator}{constraint_value}, e.g. "age" < 12. -.parseSingleConstraint <- function(patientsetConstraints, studyConcepts){ +.parseSingleConstraint <- function(patientsetConstraints, studyConcepts) { constraint <- list() # grab the different elements of the constraint definition @@ -444,26 +439,25 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery constraint$concept <- patientsetConstraints[[2]] constraint$value <- patientsetConstraints[[3]] - if(class(constraint$value) == "name" | class(constraint$value) == "call"){ + if(class(constraint$value) == "name" | class(constraint$value) == "call") { tmpValue <- try(eval(constraint$value, envir = globalenv()), silent = T) - if(class(tmpValue) == "try-error"){ + if(class(tmpValue) == "try-error") { try_error <- attr(tmpValue, "condition")$message - err_message <- paste0(try_error, ". Object was specified in (sub)constraint ", - .expressionToText(patientsetConstraints) , ".\n") - stop(err_message) + stop(paste0(try_error, ". Object was specified in (sub)constraint ", + .expressionToText(patientsetConstraints) , ".\n")) } - if(length(tmpValue) >1){ + if(length(tmpValue) >1) { if(getOption("verbose")) { message("\nInput for constraint_value: ") print(tmpValue) } - stop(paste("Incorrect input for constraint_value in (sub)constraint: ", .expressionToText(patientsetConstraints), - ".\nObject length of \'", constraint$value , "\' is larger than 1.", - "Only a single input value (string/number) is allowed as a constraint_value.")) + stop(paste0("Incorrect input for constraint_value in (sub)constraint: ", .expressionToText(patientsetConstraints), + ".\nObject length of \'", constraint$value , "\' is larger than 1. ", + "Only a single input value (string/number) is allowed as a constraint_value.")) } - if(is.na(tmpValue)){ - stop(paste("Content of \'",.expressionToText(constraint$value), "\' is 'NA'.", - " A constraint value cannot be a missing value.", sep = "")) + if(is.na(tmpValue)) { + stop(paste0("Content of \'",.expressionToText(constraint$value), "\' is 'NA'.", + " A constraint value cannot be a missing value.")) } constraint$value <- tmpValue } @@ -477,9 +471,9 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery constraint$value_operator <- NA constraint$value_type <- NA - if(constraint$conceptType == "NUMERIC"){ + if(constraint$conceptType == "NUMERIC") { #check if the supplied constraint value is numeric - if(!is.numeric(constraint$value)){ + if(!is.numeric(constraint$value)) { stop(paste("The supplied constraint value ", deparse(constraint$value)," is not numerical, while concept ", constraint$conceptPath, " is a numerical concept. (This was the concept selected based on the input: \'", constraint$concept, "\'). \nEncountered in (sub)constraint: ",.expressionToText(patientsetConstraints), @@ -505,7 +499,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery if(constraint$conceptType == "CATEGORICAL_NODE" ){ #check if supplied constraint value is character - if(!is.character(constraint$value)){ + if(!is.character(constraint$value)) { warning(paste("The supplied constraint value ", constraint$value," is not of class \'character\', while concept ", constraint$conceptPath, " is a categorical concept (ie. containing text).", "\n(This was the concept selected based on the input: \'", @@ -520,11 +514,11 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery constraintValuePath <- .getConstraintConcept(constraint$value, patientsetConstraints, studyConcepts, identical.match = T, testIfEndLeave = F)[["conceptPath"]] if(constraintValuePath != paste(constraint$conceptPath, constraint$value, "\\", sep = "")){ - stop(paste("Incorrect (sub)constraint definition for (sub)constraint:\'", .expressionToText(patientsetConstraints), - "\'.", "\nThe constraint value \'", constraint$value,"\' does not seem to be an existing value ", - "of the categorical concept \'", constraint$concept, "\'.", - "\nConcept path: ", constraint$conceptPath,"\nPath to contstraint value: ", - constraintValuePath, sep= "")) + stop(paste0("Incorrect (sub)constraint definition for (sub)constraint:\'", .expressionToText(patientsetConstraints), + "\'.", "\nThe constraint value \'", constraint$value,"\' does not seem to be an existing value ", + "of the categorical concept \'", constraint$concept, "\'.", + "\nConcept path: ", constraint$conceptPath,"\nPath to contstraint value: ", + constraintValuePath)) } #translate relational operator from R to a value operator that can be used in the query @@ -532,10 +526,10 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery constraint$value_operator <- .getValueOperator(constraint$operator, "CATEGORICAL_NODE") # construct the "item" subtree for the current constraint - if(constraint$value_operator == "EQ"){ + if(constraint$value_operator == "EQ") { itemXMLtree <- xmlNode("item", xmlNode("item_key", .makeItemKey(constraintValuePath))) } - if(constraint$value_operator == "NE"){ + if(constraint$value_operator == "NE") { stop("For now the '!=' operation is not supported for categorical values") ##implement later? So that if you specify conceptX != A then it automatically selects all possible categorical # values in conceptX, except A. (you can't just use invert=1, for example trial_group != control | x < 1) @@ -543,20 +537,19 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery } } - if(constraint$conceptType == "HIGH_DIMENSIONAL"){ + if(constraint$conceptType == "HIGH_DIMENSIONAL") { # you cannot apply relational operations to the high dimensional node - stop(paste("Incorrect use of a high dimensional data node in (sub)constraint: ", - .expressionToText(patientsetConstraints),".", - "\nHigh dimensional nodes can only be used for defining patient sets by supplying the node name ", - "alone (e.g. \"mRNA day1\"); it is not possible to apply a relational operation (such as \"mRNA day1 < 0\")", - " to the node. \nWhen a high dimensional node name is supplied, ", - "all patients that have data for that high dimensional node will be selected.", sep = "")) + stop(paste0("Incorrect use of a high dimensional data node in (sub)constraint: ", + .expressionToText(patientsetConstraints),".", + "\nHigh dimensional nodes can only be used for defining patient sets by supplying the node name ", + "alone (e.g. \"mRNA day1\"); it is not possible to apply a relational operation (such as \"mRNA day1 < 0\")", + " to the node. \nWhen a high dimensional node name is supplied, ", + "all patients that have data for that high dimensional node will be selected.")) } - if(is.na(constraint$value_operator)){ - stop(paste("Could not determine which value_operator to use in the query definition for the constraint \'", - .expressionToText(patientsetConstraints), "\'. Operator supplied by user: ", constraint$operator, - sep = "" )) + if(is.na(constraint$value_operator)) { + stop(paste0("Could not determine which value_operator to use in the query definition for the constraint \'", + .expressionToText(patientsetConstraints), "\'. Operator supplied by user: ", constraint$operator)) } return(itemXMLtree) @@ -567,7 +560,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # expected format item key:\\Dimension\concept_path. Examples: # \\Public Studies\Public Studies\Cell-line\Demographics\Age\ # \\Private Studies\Private Studies\Cell-line\Characteristics\Age\ -.makeItemKey <- function(conceptPath){ +.makeItemKey <- function(conceptPath) { dimension <- strsplit(conceptPath, "\\", fixed=T)[[1]][2] #get first part of the concept path, that is either public or private study # TODO: is dit nodig? @@ -579,8 +572,8 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery #translate relational operators to a text representation as is expected for the query -.getValueOperator <- function(operator, type){ - if(type == "NUMERIC"){ +.getValueOperator <- function(operator, type) { + if(type == "NUMERIC") { if(operator == "<"){return("LT")} if(operator == "<="){return("LE")} if(operator == ">"){return("GT")} @@ -589,22 +582,22 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery if(operator == "!="){return("NE")} } - if(type == "CATEGORICAL_NODE"){ - if(operator %in% c("<", "<=", ">", ">=")){ - stop(paste("The operation \'", operator, "\' is not supported for text variables.", sep = ""))} + if(type == "CATEGORICAL_NODE") { + if(operator %in% c("<", "<=", ">", ">=")) { + stop(paste0("The operation \'", operator, "\' is not supported for text variables.")) } if(operator == "=="){return("EQ")} if(operator == "!="){return("NE")} } #if the function did not return yet, something went wrong. - stop(paste("Something went wrong with determining the value_operator to use for the query definition. Operator:", - operator,". Value type: ", type, sep = "")) + stop(paste0("Something went wrong with determining the value_operator to use for the query definition. Operator:", + operator,". Value type: ", type)) } # find the concept path for a given concept definition. Concept can be specified as pattern matching a # concept name, or as a partial/full concept path or link -.getConstraintConcept <- function(concept, subconstraint, studyConcepts, identical.match = F, testIfEndLeave = T){ +.getConstraintConcept <- function(concept, subconstraint, studyConcepts, identical.match = F, testIfEndLeave = T) { info <- "Correct way to supply a concept (as part of a (sub)constraint) is: either directly as a string, containing the concept name or path, or indirectly as an object (variable) that contains a string with the concept name or path. @@ -616,29 +609,28 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery subconstraint <- .expressionToText(subconstraint) #if not string: get the value of the variable/object. Value should be one string. - if(class(concept) == "name" | class(concept) == "call"){ + if(class(concept) == "name" | class(concept) == "call") { result <- try(eval(concept, envir = globalenv()), silent = T) - if(class(result) == "try-error"){ + if(class(result) == "try-error") { try_error <- attr(result, "condition")$message - err_message <- paste0(try_error, ". Object was specified in subconstraint ", subconstraint, ".\n", info) - stop(err_message) + stop(paste0(try_error, ". Object was specified in subconstraint ", subconstraint, ".\n", info)) } - if(length(result) >1){ + if(length(result) >1) { write(paste("The content of object: \'", .expressionToText(concept), "\' is:", sep = "" ),"") print(result) - stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, - ".\nObject length of \'", .expressionToText(concept), - "\' is larger than 1. Only a single string is allowed for specifying the concept.", - "The content of this variable is printed above this error message.", sep = "")) + stop(paste0("Incorrect input for concept specification in subconstraint: ", subconstraint, + ".\nObject length of \'", .expressionToText(concept), + "\' is larger than 1. Only a single string is allowed for specifying the concept.", + "The content of this variable is printed above this error message.")) } - if(is.na(result)){ + if(is.na(result)) { stop(paste("Content of \'",.expressionToText(concept), "\' is 'NA'.", " Cannot use 'NA' as concept specification.", sep = "")) } concept <- result } #concept should be a string. - if(!is.character(concept)){ + if(!is.character(concept)) { stop(paste("Incorrect input for concept specification in subconstraint: ", subconstraint, ".\n", info, sep = "")) } @@ -652,41 +644,41 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery is.concept.path <- grepl("\\\\", concept) conceptMatch <- character(0) - if(!is.concept.path){ + if(!is.concept.path) { #concept paths are in 'fullName' column of getConcepts result conceptMatch <- grep(concept, studyConcepts$name, ignore.case = !identical.match) - if(length(conceptMatch) > 1){ + if(length(conceptMatch) > 1) { conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, concept_list = studyConcepts$name) } } - if(length(conceptMatch) == 0){ + if(length(conceptMatch) == 0) { # supplied concept migth be concept path or link. is.concept.link <- grepl("^/studies/.+/concepts/", concept) | grepl("^studies/.+/concepts/", concept) - if(is.concept.path & is.concept.link){stop( - paste( "Something went wrong with detecting whether the provided string \'", concept, - "\' is a concept path or concept link. Please check if the provided string is correct.", - "\nTo check this, you can look at the resulting data.frame of getConcepts(YOUR_STUDY_NAME).", - "\nThe concept paths that can be used for this study can be found in the \'fullName\' column,", - "and the concept links in the \'api.link.self.href\' column", - "If the string does have the correct format, you may have encountered a bug.", - "\nYou can help fix it by contacting us. Type ?transmartRClient for contact details.", sep = "")) + if(is.concept.path & is.concept.link) { + stop(paste0("Something went wrong with detecting whether the provided string \'", concept, + "\' is a concept path or concept link. Please check if the provided string is correct.", + "\nTo check this, you can look at the resulting data.frame of getConcepts(YOUR_STUDY_NAME).", + "\nThe concept paths that can be used for this study can be found in the \'fullName\' column,", + "and the concept links in the \'api.link.self.href\' column", + "If the string does have the correct format, you may have encountered a bug.", + "\nYou can help fix it by contacting us. Type ?transmartRClient for contact details.")) } - if(is.concept.path){ + if(is.concept.path) { conceptMatch <- grep(concept, studyConcepts$fullName, fixed = T) - if(length(conceptMatch) > 1){ + if(length(conceptMatch) > 1) { conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, concept_list = studyConcepts$fullName) } } - if(is.concept.link){ + if(is.concept.link) { .message("\nDetecting a concept.link. Will attempt to find matching concept path.") conceptMatch <- grep(concept, studyConcepts$api.link.self.href) - if(length(conceptMatch) > 1){ + if(length(conceptMatch) > 1) { conceptMatch <- .selectMatch(concept = concept, matching_indices = conceptMatch, concept_list = studyConcepts$api.link.self.href) } @@ -694,15 +686,15 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery } identicalM <- "" - if(identical.match){identicalM <- "identical(literal) "} - if(length(conceptMatch) == 0){ - stop(paste("No ", identicalM, "matching concept or categorical value found in this study for \'", - orig_concept, "\', found in subconstraint: ", subconstraint, - "\nNote: The supplied concept in the constraint definition can be a full or partial ", - "match to the concept name, and can even contain regular expressions (pattern matching will be done as", - " done in the grep function, ignoring case) or it can be a concept.link or a concept.path.", - "\nIn case of a categorical concept; the value part of the constraint has to be a literal match to one", - " of the possible categorical values for that concept." , sep = "")) + if(identical.match) { identicalM <- "identical(literal) "} + if(length(conceptMatch) == 0) { + stop(paste0("No ", identicalM, "matching concept or categorical value found in this study for \'", + orig_concept, "\', found in subconstraint: ", subconstraint, + "\nNote: The supplied concept in the constraint definition can be a full or partial ", + "match to the concept name, and can even contain regular expressions (pattern matching will be done as", + " done in the grep function, ignoring case) or it can be a concept.link or a concept.path.", + "\nIn case of a categorical concept; the value part of the constraint has to be a literal match to one", + " of the possible categorical values for that concept.")) } #test if matches are endLeaves, ie. a data node. @@ -710,54 +702,54 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # (ie. data node), either categorical or numerical, and if it's categorical it should be an end leave and not a # categorical value. If only a concept is supplied as a constraint, it is possible to also use other concepts that # are not end leaves, and high dimensional data nodes - in that case testIfEndLeave should be FALSE. - if(!studyConcepts$endLeaf[[conceptMatch]] & testIfEndLeave){ - stop(paste("The supplied concept \'", concept, "\' is not a data node (ie. not an end leaf of the transmart tree).", - "The supplied concept name/path/link must point to a single numerical or categorical", - " data node (end leaf).", sep = "")) + if(!studyConcepts$endLeaf[[conceptMatch]] & testIfEndLeave) { + stop(paste0("The supplied concept \'", concept, "\' is not a data node (ie. not an end leaf of the transmart tree).", + "The supplied concept name/path/link must point to a single numerical or categorical", + " data node (end leaf).")) } matched_concept = list(conceptPath = studyConcepts$fullName[conceptMatch], conceptType = studyConcepts$type[conceptMatch]) - .message(paste("\nMatched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, - "\'\n to concept (full path): \'", matched_concept$conceptPath, "\'\n", sep = "") ) + .message(paste0("\nMatched the concept \'", orig_concept, "\' in subconstraint \'", subconstraint, + "\'\n to concept (full path): \'", matched_concept$conceptPath, "\'\n") ) return(matched_concept) } #called by .getConstraintConcept if there were initially multiple matches found for the concept, using the 'grep' function -.selectMatch <- function(concept, matching_indices, concept_list){ +.selectMatch <- function(concept, matching_indices, concept_list) { #any literal, full length matches? (ignoring case) literalMatches <- tolower(concept_list[matching_indices]) == tolower(concept) - if(any(literalMatches)){ + if(any(literalMatches)) { matching_indices <- matching_indices[literalMatches] - if(length(matching_indices) > 1){ - stop(paste("There seem to be more than one concepts with the name \'", concept, "\'.", - "\nPlease use the concept path instead of the concept name to specify the concept.", - "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).", sep = "")) + if(length(matching_indices) > 1) { + stop(paste0("There seem to be more than one concepts with the name \'", concept, "\'.", + "\nPlease use the concept path instead of the concept name to specify the concept.", + "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).")) } - message(paste("\nMultiple matching concepts found for the string \'", concept, - "\'. One identical match was found (ignoring case): \'", - concept_list[matching_indices], "\'.\nThis match is selected.", - "\nFor more precise matching use full-length concept names, paths, or links,", - " and/or include beginning/end of string symbols (^/$) - see ?regexp.", - "Note: regexp can only be used for specifying concept names or links, not paths",sep = "")) + message(paste0("\nMultiple matching concepts found for the string \'", concept, + "\'. One identical match was found (ignoring case): \'", + concept_list[matching_indices], "\'.\nThis match is selected.", + "\nFor more precise matching use full-length concept names, paths, or links,", + " and/or include beginning/end of string symbols (^/$) - see ?regexp.", + "Note: regexp can only be used for specifying concept names or links, not paths")) } #if not literal match take the shortest match - if(!any(literalMatches)){ + if(!any(literalMatches)) { paths_tmp<- concept_list[matching_indices] shortest_match<- matching_indices[which.min(nchar(paths_tmp))] matching_indices<- shortest_match - message(paste("\nMultiple matching concepts found for the string \'", concept,"\', selecting shortest match: \'", - paste(concept_list[shortest_match], collapse = ","), "\'.", - "\nFor more precise matching use full-length names or paths,", - " and/or include beginning/end of string symbols (^/$) - see ?regexp", sep = "")) - if(length(matching_indices) > 1){ - stop(paste("There are multiple shortest matches for \'", concept, "\'. Matches: ", - paste(concept_list[shortest_match], collapse = ", "), ".", - "\nPlease use a more specific/longer string for specifying the concept name or path,", - "or use the (full) concept path instead of the concept name to specify the concept.", - "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).", sep = "")) + message(paste0("\nMultiple matching concepts found for the string \'", concept,"\', selecting shortest match: \'", + paste(concept_list[shortest_match], collapse = ","), "\'.", + "\nFor more precise matching use full-length names or paths,", + " and/or include beginning/end of string symbols (^/$) - see ?regexp")) + if(length(matching_indices) > 1) { + stop(paste0("There are multiple shortest matches for \'", concept, "\'. Matches: ", + paste(concept_list[shortest_match], collapse = ", "), ".", + "\nPlease use a more specific/longer string for specifying the concept name or path,", + "or use the (full) concept path instead of the concept name to specify the concept.", + "(Hint: Concept paths can be found in the \'fullName\' column of the getConcepts() result).")) } } return(matching_indices) diff --git a/tests/run-tests.R b/tests/run-tests.R index eb01f7f..e532a52 100644 --- a/tests/run-tests.R +++ b/tests/run-tests.R @@ -30,7 +30,7 @@ require("transmartRClient") unittestsLocation <- system.file("unittests", package="transmartRClient") -test.suite <- defineTestSuite("highdimTests", +test.suite <- defineTestSuite("RInterface tests", dirs = unittestsLocation, testFileRegexp = "^runit.+\\.[rR]$") From ecf742a08090c605f35e5406e4ec71f9eebacdac Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Thu, 15 Sep 2016 17:07:46 +0200 Subject: [PATCH 25/26] Fix another bug --- R/createPatientSet.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/createPatientSet.R b/R/createPatientSet.R index c0a1621..d1e9844 100644 --- a/R/createPatientSet.R +++ b/R/createPatientSet.R @@ -148,12 +148,12 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery # concepts with type numeric and high_dimensional are end-leaves, # concepts with type categorical_options are not end-leaves - endLeaf <- logical() + endLeaf <- NA conceptListStudy <- cbind(conceptListStudy, endLeaf, stringsAsFactors = F) conceptListStudy$endLeaf[conceptListStudy$type %in% c("NUMERIC", "HIGH_DIMENSIONAL")] <- T conceptListStudy$endLeaf[conceptListStudy$type == "CATEGORICAL_OPTION"] <- F - #find categorical data nodes, and set type of categorical end-leave (data node) to "CATEGORICAL_NODE" + # find categorical data nodes, and set type of categorical end-leave (data node) to "CATEGORICAL_NODE" # concepts with 'type' categorical_option are the concept values. Take the concept path of the concept values and # remove the last part to retrieve a list of concept paths for categorical nodes. categoricalOptionsPaths <- conceptListStudy$fullName[conceptListStudy$type == "CATEGORICAL_OPTION"] From 355d9bd76b778982e8835f764cdffa680912b2a5 Mon Sep 17 00:00:00 2001 From: Jan Kanis Date: Thu, 15 Sep 2016 19:03:05 +0200 Subject: [PATCH 26/26] more refactorings --- R/createPatientSet.R | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/R/createPatientSet.R b/R/createPatientSet.R index d1e9844..d8053c9 100644 --- a/R/createPatientSet.R +++ b/R/createPatientSet.R @@ -81,21 +81,8 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery stop("Incorrect input for patient set constraints. Found multiple strings for defining the patient set constraints. The patient set constraints should be supplied in one single expression (or string).")} - result <- try({ - patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] - if(length(patientsetConstraintsParsed) == 1 && is.character(patientsetConstraintsParsed)) { - #e.g. happens if input string is "\"age\"" - patientsetConstraints <- patientsetConstraintsParsed - } - if(length(patientsetConstraintsParsed) > 1) { - .message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", - "such as: \"age\" > 65.", - "\nWill attempt to parse the constraints out of the string, converting it", - "into an expression...")) - patientsetConstraints <- patientsetConstraintsParsed - } - }, silent = T) - + patientsetConstraintsParsed <- NA + result <- try({ patientsetConstraintsParsed <- parse(text = patientsetConstraints)[[1]] }, silent = T) if(class(result) == "try-error") { stop(paste("Detected a string as input for patient set constraints. Have tried to parse the", "constraints out of the string to convert it into an expression, but the attempt to", @@ -104,6 +91,19 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery "Type ?createPatientSet for more details on the expected format.")) } + if(length(patientsetConstraintsParsed) == 1 && is.character(patientsetConstraintsParsed)) { + #e.g. happens if input string is "\"age\"" + return(patientsetConstraintsParsed) + } + if(length(patientsetConstraintsParsed) > 1) { + .message(paste("\nDetecting a string as input for patient set constraints - expected is an expression,", + "such as: \"age\" > 65.", + "\nWill attempt to parse the constraints out of the string, converting it", + "into an expression...")) + return(patientsetConstraintsParsed) + } + + # if the input is already a concept name, e.g. "age" return(patientsetConstraints) } @@ -177,11 +177,10 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery logicalOperators <- c("&","&&", "|", "||") allowedOperators <- c(relationalOperators, logicalOperators) - verbose <- getOption("verbose") # construct a message that's used later on, when an error occurs. This message includes a listing of the different - # elements (sub units) of the constraint expression, if verbose == T + # elements (sub units) of the constraint expression, if option "verbose" is set. elementsMsg <- "" - if(verbose) { + if(getOption("verbose")) { subUnits <- "" for(i in 1:length(patientsetConstraints)) { subUnits <- paste(subUnits, paste("\n\tElement ", i,": ", .expressionToText(patientsetConstraints[[i]]), sep = "")) @@ -205,7 +204,7 @@ createPatientSet <- function(study.name, patientset.constraints, returnXMLquery firstElement_in_allowedOperators <- F } else { firstElement <- as.character(patientsetConstraints[[1]]) - firstElement_in_allowedOperators <- firstElement%in% allowedOperators + firstElement_in_allowedOperators <- firstElement %in% allowedOperators } if(length(patientsetConstraints) == 3 & firstElement_in_allowedOperators) {