Scans the WORCS project file for data that have been saved using
open_data
or closed_data
, and loads these data
into the global (working) environment. The function will load the original
data if available on the current system. If only a synthetic dataset is
available, this function loads the synthetic data.
The name of the object containing the data is derived from the file name by
removing the file extension, and, when applicable, the prefix
"synthetic_"
. Thus, both "data.csv"
and
"synthetic_data.csv"
will be loaded into an object called data
.
Usage
load_data(
worcs_directory = ".",
to_envir = TRUE,
envir = parent.frame(1),
verbose = TRUE,
use_metadata = TRUE
)
Arguments
- worcs_directory
Character, indicating the WORCS project directory from which to load data. The default value
"."
points to the current directory.- to_envir
Logical, indicating whether to load objects directly into the environment, or return a
list
containing the objects. The environment is designated by argumentenvir
. Loading objects directly into the global environment is user-friendly, but has the risk of overwriting an existing object with the same name, as explained inload
. The functionload_data
gives a warning when this happens.- envir
The environment where the data should be loaded. The default value
parent.frame(1)
refers to the global environment in an interactive session.- verbose
Logical. Whether or not to print status messages to the console. Default: TRUE
- use_metadata
Logical. Whether or not to use the codebook and value labels and attempt to coerce the class and values of variables to those recorded therein. Default: TRUE
Value
Returns a list invisibly. If to_envir = TRUE
, this list
contains the loaded data files. If to_envir = FALSE
, the list is
empty, and the loaded data files are attached directly to the global
environment.
Examples
test_dir <- file.path(tempdir(), "loaddata")
old_wd <- getwd()
dir.create(test_dir)
setwd(test_dir)
worcs:::write_worcsfile(".worcs")
df <- iris[1:5, ]
suppressWarnings(closed_data(df, codebook = NULL))
#> ✔ Storing original data in 'df.csv' and updating the checksum in '.worcs'.
#> ✔ Generating synthetic data for public use. Ensure that no identifying information is included.
#>
|
| | 0%
|
|============== | 20%
|
|============================ | 40%
|
|========================================== | 60%
|
|======================================================== | 80%
|
|======================================================================| 100%
#> ✔ Storing synthetic data in 'synthetic_df.csv' and updating the checksum in '.worcs'.
#> ✔ Updating '.gitignore'.
#> ✔ Updating '.gitignore'.
#> ✔ Storing value labels in 'value_labels_df.yml'.
load_data()
#> The following objects are masked from check_data_resources(dn_worcs = dn_worcs, worcsfile = worcsfile, verbose = verbose) (pos = 5):
#>
#> data_files, data_files_synth, data_original, fn_data_files
#> ✔ Loading original data from 'df.csv'.
#> ✖ No valid codebook found.
#> Warning: Object 'df' already exists in the environment designated by 'envir', and will be replaced with the contents of 'df.csv'.
data
#> function (..., list = character(), package = NULL, lib.loc = NULL,
#> verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
#> {
#> fileExt <- function(x) {
#> db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
#> ans <- sub(".*\\.", "", x)
#> ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2",
#> x[db])
#> ans
#> }
#> my_read_table <- function(...) {
#> lcc <- Sys.getlocale("LC_COLLATE")
#> on.exit(Sys.setlocale("LC_COLLATE", lcc))
#> Sys.setlocale("LC_COLLATE", "C")
#> read.table(...)
#> }
#> stopifnot(is.character(list))
#> names <- c(as.character(substitute(list(...))[-1L]), list)
#> if (!is.null(package)) {
#> if (!is.character(package))
#> stop("'package' must be a character vector or NULL")
#> }
#> paths <- find.package(package, lib.loc, verbose = verbose)
#> if (is.null(lib.loc))
#> paths <- c(path.package(package, TRUE), if (!length(package)) getwd(),
#> paths)
#> paths <- unique(normalizePath(paths[file.exists(paths)]))
#> paths <- paths[dir.exists(file.path(paths, "data"))]
#> dataExts <- tools:::.make_file_exts("data")
#> if (length(names) == 0L) {
#> db <- matrix(character(), nrow = 0L, ncol = 4L)
#> for (path in paths) {
#> entries <- NULL
#> packageName <- if (file_test("-f", file.path(path,
#> "DESCRIPTION")))
#> basename(path)
#> else "."
#> if (file_test("-f", INDEX <- file.path(path, "Meta",
#> "data.rds"))) {
#> entries <- readRDS(INDEX)
#> }
#> else {
#> dataDir <- file.path(path, "data")
#> entries <- tools::list_files_with_type(dataDir,
#> "data")
#> if (length(entries)) {
#> entries <- unique(tools::file_path_sans_ext(basename(entries)))
#> entries <- cbind(entries, "")
#> }
#> }
#> if (NROW(entries)) {
#> if (is.matrix(entries) && ncol(entries) == 2L)
#> db <- rbind(db, cbind(packageName, dirname(path),
#> entries))
#> else warning(gettextf("data index for package %s is invalid and will be ignored",
#> sQuote(packageName)), domain = NA, call. = FALSE)
#> }
#> }
#> colnames(db) <- c("Package", "LibPath", "Item", "Title")
#> footer <- if (missing(package))
#> paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")),
#> "\n", "to list the data sets in all *available* packages.")
#> else NULL
#> y <- list(title = "Data sets", header = NULL, results = db,
#> footer = footer)
#> class(y) <- "packageIQR"
#> return(y)
#> }
#> paths <- file.path(paths, "data")
#> for (name in names) {
#> found <- FALSE
#> for (p in paths) {
#> tmp_env <- if (overwrite)
#> envir
#> else new.env()
#> if (file_test("-f", file.path(p, "Rdata.rds"))) {
#> rds <- readRDS(file.path(p, "Rdata.rds"))
#> if (name %in% names(rds)) {
#> found <- TRUE
#> if (verbose)
#> message(sprintf("name=%s:\t found in Rdata.rds",
#> name), domain = NA)
#> thispkg <- sub(".*/([^/]*)/data$", "\\1", p)
#> thispkg <- sub("_.*$", "", thispkg)
#> thispkg <- paste0("package:", thispkg)
#> objs <- rds[[name]]
#> lazyLoad(file.path(p, "Rdata"), envir = tmp_env,
#> filter = function(x) x %in% objs)
#> break
#> }
#> else if (verbose)
#> message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n",
#> name, paste(names(rds), collapse = ",")),
#> domain = NA)
#> }
#> files <- list.files(p, full.names = TRUE)
#> files <- files[grep(name, files, fixed = TRUE)]
#> if (length(files) > 1L) {
#> o <- match(fileExt(files), dataExts, nomatch = 100L)
#> paths0 <- dirname(files)
#> paths0 <- factor(paths0, levels = unique(paths0))
#> files <- files[order(paths0, o)]
#> }
#> if (length(files)) {
#> for (file in files) {
#> if (verbose)
#> message("name=", name, ":\t file= ...", .Platform$file.sep,
#> basename(file), "::\t", appendLF = FALSE,
#> domain = NA)
#> ext <- fileExt(file)
#> if (basename(file) != paste0(name, ".", ext))
#> found <- FALSE
#> else {
#> found <- TRUE
#> switch(ext, R = , r = {
#> library("utils")
#> sys.source(file, chdir = TRUE, envir = tmp_env)
#> }, RData = , rdata = , rda = load(file, envir = tmp_env),
#> TXT = , txt = , tab = , tab.gz = , tab.bz2 = ,
#> tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name,
#> my_read_table(file, header = TRUE, as.is = FALSE),
#> envir = tmp_env), CSV = , csv = , csv.gz = ,
#> csv.bz2 = , csv.xz = assign(name, my_read_table(file,
#> header = TRUE, sep = ";", as.is = FALSE),
#> envir = tmp_env), found <- FALSE)
#> }
#> if (found)
#> break
#> }
#> if (verbose)
#> message(if (!found)
#> "*NOT* ", "found", domain = NA)
#> }
#> if (found)
#> break
#> }
#> if (!found) {
#> warning(gettextf("data set %s not found", sQuote(name)),
#> domain = NA)
#> }
#> else if (!overwrite) {
#> for (o in ls(envir = tmp_env, all.names = TRUE)) {
#> if (exists(o, envir = envir, inherits = FALSE))
#> warning(gettextf("an object named %s already exists and will not be overwritten",
#> sQuote(o)))
#> else assign(o, get(o, envir = tmp_env, inherits = FALSE),
#> envir = envir)
#> }
#> rm(tmp_env)
#> }
#> }
#> invisible(names)
#> }
#> <bytecode: 0x0000021027490b00>
#> <environment: namespace:utils>
rm("data")
#> Warning: object 'data' not found
file.remove("data.csv")
#> Warning: cannot remove file 'data.csv', reason 'No such file or directory'
#> [1] FALSE
load_data()
#> The following objects are masked from check_data_resources(dn_worcs = dn_worcs, worcsfile = worcsfile, verbose = verbose) (pos = 3):
#>
#> data_files, data_files_synth, data_original, fn_data_files
#> The following objects are masked from check_data_resources(dn_worcs = dn_worcs, worcsfile = worcsfile, verbose = verbose) (pos = 6):
#>
#> data_files, data_files_synth, data_original, fn_data_files
#> ✔ Loading original data from 'df.csv'.
#> ✖ No valid codebook found.
#> Warning: Object 'df' already exists in the environment designated by 'envir', and will be replaced with the contents of 'df.csv'.
data
#> function (..., list = character(), package = NULL, lib.loc = NULL,
#> verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
#> {
#> fileExt <- function(x) {
#> db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
#> ans <- sub(".*\\.", "", x)
#> ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2",
#> x[db])
#> ans
#> }
#> my_read_table <- function(...) {
#> lcc <- Sys.getlocale("LC_COLLATE")
#> on.exit(Sys.setlocale("LC_COLLATE", lcc))
#> Sys.setlocale("LC_COLLATE", "C")
#> read.table(...)
#> }
#> stopifnot(is.character(list))
#> names <- c(as.character(substitute(list(...))[-1L]), list)
#> if (!is.null(package)) {
#> if (!is.character(package))
#> stop("'package' must be a character vector or NULL")
#> }
#> paths <- find.package(package, lib.loc, verbose = verbose)
#> if (is.null(lib.loc))
#> paths <- c(path.package(package, TRUE), if (!length(package)) getwd(),
#> paths)
#> paths <- unique(normalizePath(paths[file.exists(paths)]))
#> paths <- paths[dir.exists(file.path(paths, "data"))]
#> dataExts <- tools:::.make_file_exts("data")
#> if (length(names) == 0L) {
#> db <- matrix(character(), nrow = 0L, ncol = 4L)
#> for (path in paths) {
#> entries <- NULL
#> packageName <- if (file_test("-f", file.path(path,
#> "DESCRIPTION")))
#> basename(path)
#> else "."
#> if (file_test("-f", INDEX <- file.path(path, "Meta",
#> "data.rds"))) {
#> entries <- readRDS(INDEX)
#> }
#> else {
#> dataDir <- file.path(path, "data")
#> entries <- tools::list_files_with_type(dataDir,
#> "data")
#> if (length(entries)) {
#> entries <- unique(tools::file_path_sans_ext(basename(entries)))
#> entries <- cbind(entries, "")
#> }
#> }
#> if (NROW(entries)) {
#> if (is.matrix(entries) && ncol(entries) == 2L)
#> db <- rbind(db, cbind(packageName, dirname(path),
#> entries))
#> else warning(gettextf("data index for package %s is invalid and will be ignored",
#> sQuote(packageName)), domain = NA, call. = FALSE)
#> }
#> }
#> colnames(db) <- c("Package", "LibPath", "Item", "Title")
#> footer <- if (missing(package))
#> paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")),
#> "\n", "to list the data sets in all *available* packages.")
#> else NULL
#> y <- list(title = "Data sets", header = NULL, results = db,
#> footer = footer)
#> class(y) <- "packageIQR"
#> return(y)
#> }
#> paths <- file.path(paths, "data")
#> for (name in names) {
#> found <- FALSE
#> for (p in paths) {
#> tmp_env <- if (overwrite)
#> envir
#> else new.env()
#> if (file_test("-f", file.path(p, "Rdata.rds"))) {
#> rds <- readRDS(file.path(p, "Rdata.rds"))
#> if (name %in% names(rds)) {
#> found <- TRUE
#> if (verbose)
#> message(sprintf("name=%s:\t found in Rdata.rds",
#> name), domain = NA)
#> thispkg <- sub(".*/([^/]*)/data$", "\\1", p)
#> thispkg <- sub("_.*$", "", thispkg)
#> thispkg <- paste0("package:", thispkg)
#> objs <- rds[[name]]
#> lazyLoad(file.path(p, "Rdata"), envir = tmp_env,
#> filter = function(x) x %in% objs)
#> break
#> }
#> else if (verbose)
#> message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n",
#> name, paste(names(rds), collapse = ",")),
#> domain = NA)
#> }
#> files <- list.files(p, full.names = TRUE)
#> files <- files[grep(name, files, fixed = TRUE)]
#> if (length(files) > 1L) {
#> o <- match(fileExt(files), dataExts, nomatch = 100L)
#> paths0 <- dirname(files)
#> paths0 <- factor(paths0, levels = unique(paths0))
#> files <- files[order(paths0, o)]
#> }
#> if (length(files)) {
#> for (file in files) {
#> if (verbose)
#> message("name=", name, ":\t file= ...", .Platform$file.sep,
#> basename(file), "::\t", appendLF = FALSE,
#> domain = NA)
#> ext <- fileExt(file)
#> if (basename(file) != paste0(name, ".", ext))
#> found <- FALSE
#> else {
#> found <- TRUE
#> switch(ext, R = , r = {
#> library("utils")
#> sys.source(file, chdir = TRUE, envir = tmp_env)
#> }, RData = , rdata = , rda = load(file, envir = tmp_env),
#> TXT = , txt = , tab = , tab.gz = , tab.bz2 = ,
#> tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name,
#> my_read_table(file, header = TRUE, as.is = FALSE),
#> envir = tmp_env), CSV = , csv = , csv.gz = ,
#> csv.bz2 = , csv.xz = assign(name, my_read_table(file,
#> header = TRUE, sep = ";", as.is = FALSE),
#> envir = tmp_env), found <- FALSE)
#> }
#> if (found)
#> break
#> }
#> if (verbose)
#> message(if (!found)
#> "*NOT* ", "found", domain = NA)
#> }
#> if (found)
#> break
#> }
#> if (!found) {
#> warning(gettextf("data set %s not found", sQuote(name)),
#> domain = NA)
#> }
#> else if (!overwrite) {
#> for (o in ls(envir = tmp_env, all.names = TRUE)) {
#> if (exists(o, envir = envir, inherits = FALSE))
#> warning(gettextf("an object named %s already exists and will not be overwritten",
#> sQuote(o)))
#> else assign(o, get(o, envir = tmp_env, inherits = FALSE),
#> envir = envir)
#> }
#> rm(tmp_env)
#> }
#> }
#> invisible(names)
#> }
#> <bytecode: 0x0000021027490b00>
#> <environment: namespace:utils>
setwd(old_wd)
unlink(test_dir, recursive = TRUE)