Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# 5.8.2
- Bugfix: `ldiversity()` now computes distinct l-diversity measure correctly in case of `NAs` in Keyvars
* improved/simplified uderlying C++ code
* added Unit-Tests for `ldiversity()`
- Fixing some Header-Definitions for CRAN-Compliance
- Updated Unit-Tests for `pram()`

# 5.8.1
- New AI-assisted anonymization features:
+ `AI_createSdcObj()`: LLM-assisted variable classification into SDC roles
Expand Down
117 changes: 83 additions & 34 deletions R/measure_risk.R
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,18 @@ measure_riskWORK <- function(data, keyVars, w=NULL, missing=-999, hid=NULL, max_
#' @param missing a integer value to be used as missing value in the C++ routine
#' @param ldiv_index indices (or names) of the variables used for l-diversity
#' @param l_recurs_c l-Diversity Constant
ldiversity <- function(obj, ldiv_index=NULL, l_recurs_c=2, missing=-999, ...) {
ldiversityX(obj=obj, ldiv_index=ldiv_index, l_recurs_c=l_recurs_c, missing=missing, ...)
ldiversity <- function(obj,
ldiv_index = NULL,
l_recurs_c = 2,
missing = -999,
...) {
ldiversityX(
obj = obj,
ldiv_index = ldiv_index,
l_recurs_c = l_recurs_c,
missing = missing,
...
)
}

setGeneric("ldiversityX", function(obj, ldiv_index=NULL, l_recurs_c=2, missing=-999, ...) {
Expand All @@ -321,23 +331,27 @@ definition=function(obj, ldiv_index=NULL, l_recurs_c=2, missing=-999) {
n <- obj@manipNumVars
s <- obj@manipStrataVar
ldiv_index <- ldiv_index
if ( is.null(ldiv_index) ) {
if (is.null(ldiv_index)) {
sensVar <- get.sdcMicroObj(obj, "sensibleVar")
if ( is.null(sensVar) ) {
if (is.null(sensVar)) {
err <- paste0("You need to specify argument 'sensibleVar' in 'createSdcObj()'")
err <- paste0(err, " or specify it directly (argument 'ldiv_index') so that the")
err <- paste0(err,
" or specify it directly (argument 'ldiv_index') so that the")
err <- paste0(err, " ldiversity risk-measure can be calculated!\n")
stop(err)
} else{
ldiv_index <- sensVar
}
}
if (!is.null(k))
if (!is.null(k)) {
o[, colnames(k)] <- k
if (!is.null(n))
}
if (!is.null(n)) {
o[, colnames(n)] <- n
if (!is.null(s))
}
if (!is.null(s)) {
o$sdcGUI_strataVar <- s
}
kV <- colnames(obj@origData)[get.sdcMicroObj(obj, "keyVars")]
obj@risk$ldiversity <- ldiversityWORK(
data = o,
Expand Down Expand Up @@ -371,48 +385,83 @@ ldiversityWORK <- function(data, keyVars, ldiv_index, missing=-999, l_recurs_c=2
stop("Please define valid key variables", call. = FALSE)
}
}

# Index of sensitive variable(s)
if (!is.null(ldiv_index)) {
if (is.numeric(ldiv_index)) {
ldiv_var <- colnames(data)[ldiv_index]
ldiv_index <- length(variables) + 1:length(ldiv_index)
} else if (is.character(ldiv_index)) {
ldiv_var <- ldiv_index
ldiv_index <- length(variables) + 1:length(ldiv_index)
}
if (any(ldiv_var %in% variables))

# Calculate the 1-based index for the C++ matrix (KeyVars + SensVars)
ldiv_index_cpp <- length(variables) + 1:length(ldiv_index)

if (any(ldiv_var %in% variables)) {
stop("Sensitivity variable should not be a keyVariable")
} else ldiv_var <- character(0)
}
} else {
ldiv_var <- character(0)
ldiv_index_cpp <- -99
}

# Prep data (factors/strings -> numeric)
n_key_vars <- length(variables)
dataX <- data[, c(variables, ldiv_var), drop=FALSE]
for (i in 1:ncol(dataX)) {
if (!is.numeric(dataX[, i]))
dataX[, i] <- as.numeric(unlist(dataX[, i]))
if (!is.numeric(dataX[, i])) {
dataX[, i] <- as.numeric(as.factor(dataX[, i]))
}
}
dataX <- as.matrix(dataX)
ind <- do.call(order, data.frame(dataX))
dataX <- dataX[ind, , drop=FALSE]
ind <- order(c(1:nrow(dataX))[ind])
if (is.null(ldiv_index))
ldiv_index=-99
if (length(ldiv_index) > 5)

# Order data for C++ Function
# Matrix is ordered in a way so that NAs are grouped together for the C++ group-matching
# na.last = TRUE ensures that NAs appear at the end of their respective groups
ind <- do.call(order, c(as.data.frame(dataX), list(na.last = TRUE)))
dataX_sorted <- dataX[ind, , drop = FALSE]

# We need an index to be able to restore original order after
# calling the c++ function
back_ind <- order(ind)

# Call C++ function
if (length(ldiv_index_cpp) > 5) {
stop("Maximal number of sensitivity variables is 5")
res <- measure_risk_cpp(dataX, 0, n_key_vars, l_recurs_c, ldiv_index, missing)
res$Fk <- res$Res[, 3]
res$Res <- res$Res[ind, ]
if (all(ldiv_index != -99)) {
res$Mat_Risk <- res$Mat_Risk[ind, ]
names(res)[names(res) == "Mat_Risk"] <- "ldiversity"
colnames(res$ldiversity) <- c(paste(rep(ldiv_var, each=3), rep(c("Distinct_Ldiversity",
"Entropy_Ldiversity", "Recursive_Ldiversity"), length(ldiv_index)), sep="_"),
"MultiEntropy_Ldiversity", "MultiRecursive_Ldiversity")
}

res <- measure_risk_cpp(
data = dataX_sorted,
weighted_R = 0,
n_key_vars_R = n_key_vars,
l_recurs_c_R = l_recurs_c,
ldiv_index_R = ldiv_index_cpp,
missing_value_R = missing
)

# Re-order results back to original order
res$Fk <- res$Res[back_ind, 3]

if (all(ldiv_index_cpp != -99)) {
# Reorder the risk matrix to match original data input
ldiv_mat <- res$Mat_Risk[back_ind, , drop = FALSE]

# Specifiy column names
col_names <- c(paste(rep(ldiv_var, each = 3), rep(
c(
"Distinct_Ldiversity",
"Entropy_Ldiversity",
"Recursive_Ldiversity"
), length(ldiv_var)), sep = "_"),
"MultiEntropy_Ldiversity",
"MultiRecursive_Ldiversity")
colnames(ldiv_mat) <- col_names
res_final <- ldiv_mat
} else {
res <- res[names(res) != "Mat_Risk"]
res_final <- res$Res[back_ind, ]
}
ind <- order(res$Res[, 1], decreasing=TRUE)
res <- res$ldiversity
class(res) <- "ldiversity"
invisible(res)
class(res_final) <- "ldiversity"
invisible(res_final)
}

#' Print method for objects of class measure_risk
Expand Down
47 changes: 21 additions & 26 deletions src/Framework.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ typedef int BOOL;

// ============================= Display Messages =================================
inline extern char g_TxtBuffer[1024]; // character TxtBufferfer to display messages
static int OS_Printf(const char *Str, ...);
inline int OS_Printf(const char *Str, ...);

// ============================= Assert =================================
#ifdef _DEBUG
Expand Down Expand Up @@ -252,16 +252,16 @@ inline extern int g_NbNew;
#endif
#endif // _MSC_VER

static char *Strncpy(char *Dst, const char *Src, int Max, BOOL Warn = TRUE);
static char *ReplaceChar(char *Str, char OldChar, char NewChar);
static char *Stristr(char *Ptr, char *SubString, BOOL LeaveAfter = FALSE, BOOL ReturnNULL = TRUE);
inline static char *Strncpy(char *Dst, const char *Src, int Max, BOOL Warn = TRUE);
inline static char *ReplaceChar(char *Str, char OldChar, char NewChar);
inline static char *Stristr(char *Ptr, char *SubString, BOOL LeaveAfter = FALSE, BOOL ReturnNULL = TRUE);

//=== Parsing
static char *RemoveComment(char *Ptr, int Size = -1); // remove text between /* & */
static char *GoToNextLine(char *Ptr); // renvoie Ptr avanc� jusqu'apr�s le '\n' suivant
static char *GoTo1stChar(char *Ptr);
static char *ParseString(char *Ptr, char *Str, int Size, BOOL AdvanceTo1stChar = TRUE);
static char *ParseLine(char *Ptr, char *Str, int Size, BOOL AdvanceTo1stChar = TRUE);
//=== Parsing
inline static char *RemoveComment(char *Ptr, int Size = -1); // remove text between /* & */
inline static char *GoToNextLine(char *Ptr); // returns the ptr moved forward to the character right after the next newline
inline static char *GoTo1stChar(char *Ptr);
inline static char *ParseString(char *Ptr, char *Str, int Size, BOOL AdvanceTo1stChar = TRUE);
inline static char *ParseLine(char *Ptr, char *Str, int Size, BOOL AdvanceTo1stChar = TRUE);

//============================================= Time function

Expand All @@ -284,7 +284,7 @@ int gettimeofday(struct timeval *tv, struct timezone *tz);

#endif // _MSC_VER

static uint TimeGetMilliSecond(void);
inline static uint TimeGetMilliSecond(void);

// ============================= CTooFile =============================
class CTooFile
Expand Down Expand Up @@ -618,7 +618,7 @@ inline int SubMain(int argc, char *argv[])
#include <iostream>
#endif

char g_TxtBuffer[1024]; // character TxtBufferfer to display messages
inline char g_TxtBuffer[1024]; // character TxtBufferfer to display messages

int OS_Printf(const char *Str, ...)
{
Expand Down Expand Up @@ -703,10 +703,7 @@ int stricmp(char *str1, char *str2)
}
#endif // _MSC_VER

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-function"

char *Strncpy(char *Dst, const char *Src, int Max, BOOL Warn)
inline char *Strncpy(char *Dst, const char *Src, int Max, BOOL Warn)
{
if (Max > 0)
{
Expand All @@ -716,7 +713,7 @@ char *Strncpy(char *Dst, const char *Src, int Max, BOOL Warn)
return Dst;
}

char *ReplaceChar(char *Str, char OldChar, char NewChar)
inline char *ReplaceChar(char *Str, char OldChar, char NewChar)
{
char *Ret = Str;

Expand All @@ -731,7 +728,7 @@ char *ReplaceChar(char *Str, char OldChar, char NewChar)
return Ret;
}

char *Stristr(char *Ptr, char *SubString, BOOL LeaveAfter, BOOL ReturnNULL)
inline char *Stristr(char *Ptr, char *SubString, BOOL LeaveAfter, BOOL ReturnNULL)
{
int l = (int) strlen(SubString);

Expand All @@ -753,7 +750,7 @@ char *Stristr(char *Ptr, char *SubString, BOOL LeaveAfter, BOOL ReturnNULL)
}

///============================================= Parsing
char *RemoveComment(char *Ptr, int Size)
inline char *RemoveComment(char *Ptr, int Size)
{
if (Size < 0)
Size = (int) strlen(Ptr) + 1;
Expand Down Expand Up @@ -804,7 +801,7 @@ char *RemoveComment(char *Ptr, int Size)
}


char *GoToNextLine(char *Ptr)
inline char *GoToNextLine(char *Ptr)
{
ASSERT(Ptr != NULL);

Expand All @@ -829,7 +826,7 @@ char *GoToNextLine(char *Ptr)
}


char *GoTo1stChar(char *Ptr)
inline char *GoTo1stChar(char *Ptr)
{
while ((*Ptr == ' ' || *Ptr == '\t') && *Ptr != 0 && *Ptr != '\r' && *Ptr != '\n')
++Ptr;
Expand All @@ -838,7 +835,7 @@ char *GoTo1stChar(char *Ptr)
}


char *ParseString(char *Ptr, char *Str, int Size, BOOL AdvanceTo1stChar)
inline char *ParseString(char *Ptr, char *Str, int Size, BOOL AdvanceTo1stChar)
{
//BOOL Warn = FALSE;
int i = 0;
Expand Down Expand Up @@ -892,7 +889,7 @@ char *ParseString(char *Ptr, char *Str, int Size, BOOL AdvanceTo1stChar)
}


char *ParseLine(char *Ptr, char *Str, int Size, BOOL AdvanceTo1stChar)
inline char *ParseLine(char *Ptr, char *Str, int Size, BOOL AdvanceTo1stChar)
{
//BOOL Warn = FALSE;
int i = 0;
Expand Down Expand Up @@ -987,7 +984,7 @@ int gettimeofday(struct timeval *tv, struct timezone *tz)

#endif // _MSC_VER

uint TimeGetMilliSecond(void)
inline uint TimeGetMilliSecond(void)
{
struct timeval tv;

Expand All @@ -996,8 +993,6 @@ uint TimeGetMilliSecond(void)
return (tv.tv_sec & 0x000FFFFFF) * 1000 + tv.tv_usec / 1000;
}

#pragma GCC diagnostic pop

// ===============================================================================
//
// CTooFile
Expand Down
Loading
Loading