Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; `TRUE` uses the default 3-second interval, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR.

3. `tables()` can now optionally report `data.table` objects stored one level deep inside list objects when `shallow_search=TRUE`, [#2606](https://github.com/Rdatatable/data.table/issues/2606). Thanks @MichaelChirico for the report and @manmita for the PR

### BUG FIXES

1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.
Expand Down
76 changes: 71 additions & 5 deletions R/tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,85 @@ type_size = function(DT) {
}

tables = function(mb=type_size, order.col="NAME", width=80L,
env=parent.frame(), silent=FALSE, index=FALSE)
env=parent.frame(), silent=FALSE, index=FALSE,
shallow_search=FALSE)
{
# Prints name, size and colnames of all data.tables in the calling environment by default
mb_name = as.character(substitute(mb))
if (isTRUE(mb)) { mb=type_size; mb_name="type_size" }
names = ls(envir=env, all.names=TRUE) # include "hidden" objects (starting with .)
obj = mget(names, envir=env) # doesn't copy; mget is ok with ... unlike get, #5197
w = which(vapply_1b(obj, is.data.table))
if (!length(w)) {
if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env))
return(invisible(data.table(NULL)))

info = NULL
# we check if shallow_search is requested and add found tables to w
if (shallow_search) {
is_list = vapply_1b(obj, is.list)
is_df = vapply_1b(obj, is.data.frame)
is_dt = vapply_1b(obj, is.data.table)
# list_index is a index of list which is not data.frame or data.table
list_index = which(is_list & !is_dt & !is_df)
# obj_list is a list of lists of data.tables found inside lists
obj_list = vector("list", length(list_index))
#make a listof size list_index and add wl in it
total_dt = 0L
# filling obj_list and counting total_dt
for (i in seq_along(list_index)) {
L = obj[[list_index[i]]]
wl = which(vapply_1b(L, is.data.table))
total_dt = total_dt + length(wl)
obj_list[[i]] = L[wl]
}
name_count = length(w) + total_dt
# initialize info data.table with total number of data.tables found
if (name_count == 0L) {
# nocov start. Requires long-running test case
if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env))
# nocov end
return(invisible(data.table(NULL)))
}
# create info data.table with total rows equal to number of data.tables found
info = data.table(NAME=character(name_count), NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list())
# fill in the names of data.tables found in w
for (i in seq_along(w)) { # names of w items
set(info, i, "NAME", names[w[i]])
}
# now fill in the data.tables found inside lists
cnt = 1L
if (total_dt > 0L) {
for (i in seq_along(list_index)) {
if (length(obj_list[[i]]) == 0L) next
# get the parent list name
parent_name = names[list_index[i]]
for (j in seq_along(obj_list[[i]])) {
elem_names = names(obj[[list_index[i]]])
if (!is.null(elem_names) && nzchar(elem_names[j])) {
new_name = paste0(parent_name, "$", elem_names[j])
} else {
new_name = paste0(parent_name, "[[", j, "]]")
}
DT = obj_list[[i]][[j]]
k = cnt + length(w) # row number in info data.table
cnt = cnt + 1L
set(info, k, "NAME", new_name)
set(info, k, "NROW", nrow(DT))
set(info, k, "NCOL", ncol(DT))
if (is.function(mb)) set(info, k, "MB", as.integer(mb(DT)/1048576L)) # i.e. 1024**2
if (!is.null(tt<-names(DT))) set(info, k, "COLS", tt) # TODO: don't need these if()s when #5526 is done
if (!is.null(tt<-key(DT))) set(info, k, "KEY", tt)
if (index && !is.null(tt<-indices(DT))) set(info, k, "INDICES", tt)
}
}
}
}
else {
# the original code path when shallow_search=FALSE
if (!length(w)) {
if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env))
return(invisible(data.table(NULL)))
}
info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list())
}
info = data.table(NAME=names[w], NROW=0L, NCOL=0L, MB=0.0, COLS=list(), KEY=list(), INDICES=list())
for (i in seq_along(w)) { # avoid rbindlist(lapply(DT_names)) in case of a large number of tables
DT = obj[[w[i]]]
set(info, i, "NROW", nrow(DT))
Expand Down
22 changes: 22 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -22035,3 +22035,25 @@ if (test_bit64) local({
# 7407 Test for fread() handling \x1A (ASCII SUB) at end of input
txt = paste0("foo\n", strrep("a", 4096 * 100), "\x1A")
test(2359.1, nchar(fread(txt)$foo), 409600L)

#2606 tables() shallow_search finds nested data.tables in lists
# creating env so that the names are within it
xenv2 = new.env()
xenv2$DT = data.table(a = 1L)
xenv2$L = list(data.table(a = 1, b = 4:6), data.table(a = 2, b = 7:10))
xenv2$M = list(b = data.table(a = 1, b = 4:6), a = 1:5)
xenv2$N = list(a = 1:5)
setkey(xenv2$M$b, a)
setindex(xenv2$M$b, b)
test(2360.1, tables(env = xenv2, shallow_search = TRUE)$NAME, c("DT", "L[[1]]", "L[[2]]", "M$b"))
test(2360.2, tables(env = xenv2, shallow_search = TRUE)$NROW, c(1L, 3L, 4L, 3L))
test(2360.3, tables(env = xenv2, shallow_search = TRUE)$NCOL, c(1L, 2L, 2L, 2L))
test(2360.4, tables(env = xenv2, shallow_search = TRUE, index = TRUE)$INDICES, list(NULL, NULL, NULL, "b"))
test(2360.5, tables(env = xenv2, shallow_search = TRUE, index = TRUE)$KEY, list(NULL, NULL, NULL, "a"))
rm(xenv2)

# no data.table test
xenv_empty = new.env()
test(2360.6, tables(env = xenv_empty, shallow_search = TRUE), invisible(data.table(NULL)))
test(2360.7, tables(env = xenv_empty), invisible(data.table(NULL)))
rm(xenv_empty)
3 changes: 2 additions & 1 deletion man/tables.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
}
\usage{
tables(mb=type_size, order.col="NAME", width=80,
env=parent.frame(), silent=FALSE, index=FALSE)
env=parent.frame(), silent=FALSE, index=FALSE, shallow_search=FALSE)
}
\arguments{
\item{mb}{ a function which accepts a \code{data.table} and returns its size in bytes. By default, \code{type_size} (same as \code{TRUE}) provides a fast lower bound by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). A column \code{"MB"} is included in the output unless \code{FALSE} or \code{NULL}. }
Expand All @@ -15,6 +15,7 @@ tables(mb=type_size, order.col="NAME", width=80,
\item{env}{ An \code{environment}, typically the \code{.GlobalEnv} by default, see Details. }
\item{silent}{ \code{logical}; should the output be printed? }
\item{index}{ \code{logical}; if \code{TRUE}, the column \code{INDICES} is added to indicate the indices assorted with each object, see \code{\link{indices}}. }
\item{shallow_search}{\code{logical}; if \code{TRUE}, searches for \code{data.table} objects inside top-level lists}
}
\details{
Usually \code{tables()} is executed at the prompt, where \code{parent.frame()} returns \code{.GlobalEnv}. \code{tables()} may also be useful inside functions where \code{parent.frame()} is the local scope of the function; in such a scenario, simply set it to \code{.GlobalEnv} to get the same behaviour as at prompt.
Expand Down
Loading