From 803f763c815f27d1ff1cfa16a21e1a6d9c17984b Mon Sep 17 00:00:00 2001 From: manmita Date: Sat, 10 Jan 2026 04:36:54 +0530 Subject: [PATCH 1/8] feat(2606): added shallow search for data.table in tables() --- NEWS.md | 3 ++ R/tables.R | 74 ++++++++++++++++++++++++++++++++++++++++--- inst/tests/tests.Rraw | 23 ++++++++++++++ 3 files changed, 95 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 256c7450ac..daa592c098 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,6 +18,9 @@ 2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; `TRUE` uses the default 3-second interval, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR. + +3. `tables()` can now optionally report `data.table` objects stored one level deep inside list objects when `shallow_search=TRUE`, [#2606](https://github.com/Rdatatable/data.table/issues/2606). Thanks @MichaelChirico for the report and @manmita for the PR + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). diff --git a/R/tables.R b/R/tables.R index 960c74343f..8ab8dc7510 100644 --- a/R/tables.R +++ b/R/tables.R @@ -19,7 +19,8 @@ type_size = function(DT) { } tables = function(mb=type_size, order.col="NAME", width=80L, - env=parent.frame(), silent=FALSE, index=FALSE) + env=parent.frame(), silent=FALSE, index=FALSE, + shallow_search=FALSE) { # Prints name, size and colnames of all data.tables in the calling environment by default mb_name = as.character(substitute(mb)) @@ -27,11 +28,74 @@ tables = function(mb=type_size, order.col="NAME", width=80L, names = ls(envir=env, all.names=TRUE) # include "hidden" objects (starting with .) obj = mget(names, envir=env) # doesn't copy; mget is ok with ... unlike get, #5197 w = which(vapply_1b(obj, is.data.table)) - if (!length(w)) { - if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) - return(invisible(data.table(NULL))) + + info = NULL + # we check if shallow_search is requested and add found tables to w + if (shallow_search) { + is_list = vapply_1b(obj, is.list) + is_df = vapply_1b(obj, is.data.frame) + is_dt = vapply_1b(obj, is.data.table) + # list_index is a index of list which is not data.frame or data.table + list_index = which(is_list & !is_dt & !is_df) + # obj_list is a list of lists of data.tables found inside lists + obj_list = vector("list", length(list_index)) + #make a listof size list_index and add wl in it + total_dt = 0L + # filling obj_list and counting total_dt + for (i in seq_along(list_index)) { + L = obj[[list_index[i]]] + wl = which(vapply_1b(L, is.data.table)) + total_dt = total_dt + length(wl) + obj_list[[i]] = L[wl] + } + name_count = length(w) + total_dt + # initialize info data.table with total number of data.tables found + if (name_count == 0L) { + if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) + return(invisible(data.table(NULL))) + } + # create info data.table with total rows equal to number of data.tables found + info = data.table(NAME=character(name_count), NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list()) + # fill in the names of data.tables found in w + for (i in seq_along(w)) { # names of w items + set(info, i, "NAME", names[w[i]]) + } + # now fill in the data.tables found inside lists + cnt = 1L + if (total_dt > 0L) { + for (i in seq_along(list_index)) { + if (length(obj_list[[i]]) == 0L) next + # get the parent list name + parent_name = names[list_index[i]] + for (j in seq_along(obj_list[[i]])) { + elem_names = names(obj[[list_index[i]]]) + if (!is.null(elem_names) && nzchar(elem_names[j])) { + new_name = paste0(parent_name, "$", elem_names[j]) + } else { + new_name = paste0(parent_name, "[[", j, "]]") + } + DT = obj_list[[i]][[j]] + k = cnt + length(w) # row number in info data.table + cnt = cnt + 1L + set(info, k, "NAME", new_name) + set(info, k, "NROW", nrow(DT)) + set(info, k, "NCOL", ncol(DT)) + if (is.function(mb)) set(info, k, "MB", as.integer(mb(DT)/1048576L)) # i.e. 1024**2 + if (!is.null(tt<-names(DT))) set(info, k, "COLS", tt) # TODO: don't need these if()s when #5526 is done + if (!is.null(tt<-key(DT))) set(info, k, "KEY", tt) + if (index && !is.null(tt<-indices(DT))) set(info, k, "INDICES", tt) + } + } + } + } + else { + # the original code path when shallow_search=FALSE + if (!length(w)) { + if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) + return(invisible(data.table(NULL))) + } + info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list()) } - info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list()) for (i in seq_along(w)) { # avoid rbindlist(lapply(DT_names)) in case of a large number of tables DT = obj[[w[i]]] set(info, i, "NROW", nrow(DT)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aba5720a60..b5b0f27a82 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22030,3 +22030,26 @@ if (test_bit64) local({ merged$gforce_mean, merged$true_mean ) }) + +#2606 tables() shallow_search finds nested data.tables in lists +# creating env so that the names are within it +xenv2 = new.env() +xenv2$DT = data.table(a = 1L) +xenv2$L = list(data.table(a = 1, b = 4:6), data.table(a = 2, b = 7:10)) +xenv2$M = list(b = data.table(a = 1, b = 4:6), a = 1:5) +# Test for NAME column +test(2359.1, + tables(env = xenv2, shallow_search = TRUE)$NAME, + c("DT", "L[[1]]", "L[[2]]", "M$b") +) +# Test for NROW column +test(2359.2, + tables(env = xenv2, shallow_search = TRUE)$NROW, + c(1L, 1L, 1L, 1L) +) +# Test for NCOL column +test(2359.3, + tables(env = xenv2, shallow_search = TRUE)$NCOL, + c(1L, 2L, 2L, 2L) +) +rm(xenv2) From 107a65b4d195202660ac6da0e252efba5a0c4788 Mon Sep 17 00:00:00 2001 From: manmita Date: Sat, 10 Jan 2026 04:47:58 +0530 Subject: [PATCH 2/8] feat(2606): re-numbered tests --- inst/tests/tests.Rraw | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 648ec3b46c..1b1e56eef2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22043,17 +22043,17 @@ xenv2$DT = data.table(a = 1L) xenv2$L = list(data.table(a = 1, b = 4:6), data.table(a = 2, b = 7:10)) xenv2$M = list(b = data.table(a = 1, b = 4:6), a = 1:5) # Test for NAME column -test(2359.1, +test(2360.1, tables(env = xenv2, shallow_search = TRUE)$NAME, c("DT", "L[[1]]", "L[[2]]", "M$b") ) # Test for NROW column -test(2359.2, +test(2360.2, tables(env = xenv2, shallow_search = TRUE)$NROW, c(1L, 1L, 1L, 1L) ) # Test for NCOL column -test(2359.3, +test(2360.3, tables(env = xenv2, shallow_search = TRUE)$NCOL, c(1L, 2L, 2L, 2L) ) From ca8701e8dd12816757569110a299d2f968162db7 Mon Sep 17 00:00:00 2001 From: manmita Date: Sat, 10 Jan 2026 05:01:13 +0530 Subject: [PATCH 3/8] feat(2606): fixed 2360.2 test --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1b1e56eef2..f234f434d1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22050,7 +22050,7 @@ test(2360.1, # Test for NROW column test(2360.2, tables(env = xenv2, shallow_search = TRUE)$NROW, - c(1L, 1L, 1L, 1L) + c(1L, 3L, 4L, 3L) ) # Test for NCOL column test(2360.3, From 174b0ea8959e8117c0f078db9389c8a1edb2d5c1 Mon Sep 17 00:00:00 2001 From: manmita Date: Sat, 10 Jan 2026 05:38:18 +0530 Subject: [PATCH 4/8] feat(2606): added shallow_search arg into tables.Rd file --- man/tables.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/tables.Rd b/man/tables.Rd index 5898cd7305..576455d441 100644 --- a/man/tables.Rd +++ b/man/tables.Rd @@ -6,7 +6,7 @@ } \usage{ tables(mb=type_size, order.col="NAME", width=80, - env=parent.frame(), silent=FALSE, index=FALSE) + env=parent.frame(), silent=FALSE, index=FALSE, shallow_search=FALSE) } \arguments{ \item{mb}{ a function which accepts a \code{data.table} and returns its size in bytes. By default, \code{type_size} (same as \code{TRUE}) provides a fast lower bound by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). A column \code{"MB"} is included in the output unless \code{FALSE} or \code{NULL}. } @@ -15,6 +15,7 @@ tables(mb=type_size, order.col="NAME", width=80, \item{env}{ An \code{environment}, typically the \code{.GlobalEnv} by default, see Details. } \item{silent}{ \code{logical}; should the output be printed? } \item{index}{ \code{logical}; if \code{TRUE}, the column \code{INDICES} is added to indicate the indices assorted with each object, see \code{\link{indices}}. } + \item{shallow_search}{\code{logical}; if \code{TRUE}, searches for \code{data.table} objects inside top-level lists} } \details{ Usually \code{tables()} is executed at the prompt, where \code{parent.frame()} returns \code{.GlobalEnv}. \code{tables()} may also be useful inside functions where \code{parent.frame()} is the local scope of the function; in such a scenario, simply set it to \code{.GlobalEnv} to get the same behaviour as at prompt. From 03e58c9df7b167e761ac195568787baccd525f3f Mon Sep 17 00:00:00 2001 From: manmita Date: Sat, 10 Jan 2026 05:47:35 +0530 Subject: [PATCH 5/8] feat(2606): added tests for mb = TRUE and empty env --- inst/tests/tests.Rraw | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f234f434d1..6ebbd3fec1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22057,4 +22057,12 @@ test(2360.3, tables(env = xenv2, shallow_search = TRUE)$NCOL, c(1L, 2L, 2L, 2L) ) +res = tables(mb = TRUE) +test(2360.5, all("MB" %in% names(res)) && is.numeric(res$MB), TRUE) rm(xenv2) +xenv_empty = new.env() +out = capture.output(res <- tables(env = xenv_empty, silent = FALSE)) +msg_ok = any(grepl("No objects of class data.table exist in", out)) +dt_ok = is.data.table(res) && nrow(res) == 0L +test(2360.6, msg_ok && dt_ok, TRUE) +rm(xenv_empty) From 3ba7c1b1a3a0a3e1e26e3db4578f83072c0eb0b9 Mon Sep 17 00:00:00 2001 From: manmita Date: Sat, 10 Jan 2026 06:35:14 +0530 Subject: [PATCH 6/8] feat(2606): add more tests to fix coverage issue --- R/tables.R | 2 ++ inst/tests/tests.Rraw | 22 ++++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/R/tables.R b/R/tables.R index 8ab8dc7510..83a3555670 100644 --- a/R/tables.R +++ b/R/tables.R @@ -51,7 +51,9 @@ tables = function(mb=type_size, order.col="NAME", width=80L, name_count = length(w) + total_dt # initialize info data.table with total number of data.tables found if (name_count == 0L) { + # nocov start. Requires long-running test case if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) + # nocov end return(invisible(data.table(NULL))) } # create info data.table with total rows equal to number of data.tables found diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6ebbd3fec1..36722a81e5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22042,6 +22042,7 @@ xenv2 = new.env() xenv2$DT = data.table(a = 1L) xenv2$L = list(data.table(a = 1, b = 4:6), data.table(a = 2, b = 7:10)) xenv2$M = list(b = data.table(a = 1, b = 4:6), a = 1:5) +xenv2$N = list(a = 1:5) # Test for NAME column test(2360.1, tables(env = xenv2, shallow_search = TRUE)$NAME, @@ -22057,12 +22058,21 @@ test(2360.3, tables(env = xenv2, shallow_search = TRUE)$NCOL, c(1L, 2L, 2L, 2L) ) -res = tables(mb = TRUE) -test(2360.5, all("MB" %in% names(res)) && is.numeric(res$MB), TRUE) +# Test for index and keys +setkey(xenv2$M$b, a) +setindex(xenv2$M$b, b) +test(2360.4, + tables(env = xenv2, shallow_search = TRUE, index = TRUE)$INDICES, + c(NULL, NULL, NULL, "b") +) +test(2360.5, + tables(env = xenv2, shallow_search = TRUE, index = TRUE)$KEY, + c(NULL, NULL, NULL, "a") +) rm(xenv2) + +# no data.table test xenv_empty = new.env() -out = capture.output(res <- tables(env = xenv_empty, silent = FALSE)) -msg_ok = any(grepl("No objects of class data.table exist in", out)) -dt_ok = is.data.table(res) && nrow(res) == 0L -test(2360.6, msg_ok && dt_ok, TRUE) +test(2360.6, tables(env = xenv_empty, shallow_search = TRUE), invisible(data.table(NULL))) +test(2360.7, tables(env = xenv_empty), invisible(data.table(NULL))) rm(xenv_empty) From 1e1fd11071625325cdef81cbbd00ce65ed0075fc Mon Sep 17 00:00:00 2001 From: manmita Date: Sat, 10 Jan 2026 06:41:48 +0530 Subject: [PATCH 7/8] feat(2606): fix tests 2360 for type --- inst/tests/tests.Rraw | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 36722a81e5..2fc41817da 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22043,32 +22043,13 @@ xenv2$DT = data.table(a = 1L) xenv2$L = list(data.table(a = 1, b = 4:6), data.table(a = 2, b = 7:10)) xenv2$M = list(b = data.table(a = 1, b = 4:6), a = 1:5) xenv2$N = list(a = 1:5) -# Test for NAME column -test(2360.1, - tables(env = xenv2, shallow_search = TRUE)$NAME, - c("DT", "L[[1]]", "L[[2]]", "M$b") -) -# Test for NROW column -test(2360.2, - tables(env = xenv2, shallow_search = TRUE)$NROW, - c(1L, 3L, 4L, 3L) -) -# Test for NCOL column -test(2360.3, - tables(env = xenv2, shallow_search = TRUE)$NCOL, - c(1L, 2L, 2L, 2L) -) -# Test for index and keys setkey(xenv2$M$b, a) setindex(xenv2$M$b, b) -test(2360.4, - tables(env = xenv2, shallow_search = TRUE, index = TRUE)$INDICES, - c(NULL, NULL, NULL, "b") -) -test(2360.5, - tables(env = xenv2, shallow_search = TRUE, index = TRUE)$KEY, - c(NULL, NULL, NULL, "a") -) +test(2360.1, tables(env = xenv2, shallow_search = TRUE)$NAME, c("DT", "L[[1]]", "L[[2]]", "M$b")) +test(2360.2, tables(env = xenv2, shallow_search = TRUE)$NROW, c(1L, 3L, 4L, 3L)) +test(2360.3, tables(env = xenv2, shallow_search = TRUE)$NCOL, c(1L, 2L, 2L, 2L)) +test(2360.4, tables(env = xenv2, shallow_search = TRUE, index = TRUE)$INDICES, list(NULL, NULL, NULL, "b")) +test(2360.5, tables(env = xenv2, shallow_search = TRUE, key = TRUE)$KEY, list(NULL, NULL, NULL, "a")) rm(xenv2) # no data.table test From 195229052f9ff6c9b5b5b0b7c0b3b9c3dd7f3224 Mon Sep 17 00:00:00 2001 From: manmita Date: Sat, 10 Jan 2026 06:48:13 +0530 Subject: [PATCH 8/8] feat(2606): fix tests 2360 --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2fc41817da..278cb2ffbe 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22049,7 +22049,7 @@ test(2360.1, tables(env = xenv2, shallow_search = TRUE)$NAME, c("DT", "L[[1]]", test(2360.2, tables(env = xenv2, shallow_search = TRUE)$NROW, c(1L, 3L, 4L, 3L)) test(2360.3, tables(env = xenv2, shallow_search = TRUE)$NCOL, c(1L, 2L, 2L, 2L)) test(2360.4, tables(env = xenv2, shallow_search = TRUE, index = TRUE)$INDICES, list(NULL, NULL, NULL, "b")) -test(2360.5, tables(env = xenv2, shallow_search = TRUE, key = TRUE)$KEY, list(NULL, NULL, NULL, "a")) +test(2360.5, tables(env = xenv2, shallow_search = TRUE, index = TRUE)$KEY, list(NULL, NULL, NULL, "a")) rm(xenv2) # no data.table test