Description
The Sample function draws rows randomly from the input tbl_teradata.
Usage
td_sampling_mle (
data = NULL,
data.partition.column = "ANY",
data.order.column = NULL,
summary.data = NULL,
summary.data.order.column = NULL,
stratum.column = NULL,
strata = NULL,
sample.fraction = NULL,
approx.sample.size = NULL,
seed = 0,
data.sequence.column = NULL,
summary.data.sequence.column = NULL
)
Arguments
data |
Required Argument. |
data.partition.column |
Optional Argument |
data.order.column |
Optional Argument. |
summary.data |
Optional Argument. |
summary.data.order.column |
Optional Argument. |
stratum.column |
Optional Argument. |
strata |
Optional Argument. |
sample.fraction |
Optional Argument. |
approx.sample.size |
Optional Argument. |
seed |
Optional Argument. |
data.sequence.column |
Optional Argument. |
summary.data.sequence.column |
Optional Argument. |
Value
Function returns an object of class "td_sampling_mle" which is a
named list containing object of class "tbl_teradata".
Named list member can be referenced directly with the "$" operator
using the name: result.
Examples
# Get the current context/connection
con <- td_get_context()$connection
# Load example data.
loadExampleData("sampling_example", "students", "score_category")
# Create object(s) of class "tbl_teradata".
students <- tbl(con, "students")
score_category <- tbl(con, "score_category")
# Example 1: This example selects a sample of approximately 20%
# of the rows in the students tbl_teradata.
td_sampling_out1 <- td_sampling_mle(data = students,
sample.fraction = 0.2,
seed = 2
)
# Example 2: This example applies sampling rates 20%, 30%, and 40%
# to categories fair, very good, and excellent, respectively, and rounds
# the number sampled to the nearest integer.
td_sampling_out2 <- td_sampling_mle(data = score_category,
data.partition.column = "stratum",
stratum.column = "stratum",
strata = c("fair", "very good", "excellent"),
sample.fraction = c(0.2, 0.3, 0.4),
seed = 2
)
# Example 3: This examples demonstrates conditional sampling with Approximate
# Sample Size.
# score_summary groups the score_category tbl_teradata based on the stratum
# column and also has their corresponding count.
score_summary <- score_category %>% select(stratum) %>% count(stratum) %>%
mutate(stratum_count = as.integer(n)) %>% select(-n)
td_sampling_out3 <- td_sampling_mle(data=score_category,
summary.data=score_summary,
stratum.column='stratum',
strata=c('excellent','fair','very good'),
approx.sample.size=c(5,10,5),
seed=2
)