Description
The Scale function uses statistical information from the ScaleMap
(td_scale_map_mle
) function to scale the input data set.
Usage
td_scale_mle (
object = NULL,
data = NULL,
method = NULL,
global = FALSE,
accumulate = NULL,
multiplier = 1,
intercept = "0",
input.columns = NULL,
object.sequence.column = NULL,
data.sequence.column = NULL,
object.order.column = NULL,
data.order.column = NULL
)
Arguments
object |
Required Argument. |
object.order.column |
Optional Argument. |
data |
Required Argument. |
data.order.column |
Optional Argument. |
method |
Required Argument. |
global |
Optional Argument. |
accumulate |
Optional Argument. |
multiplier |
Optional Argument. |
intercept |
Optional Argument. |
input.columns |
Optional Argument. |
object.sequence.column |
Optional Argument. |
data.sequence.column |
Optional Argument. |
Value
Function returns an object of class "td_scale_mle" which is a named
list containing object of class "tbl_teradata".
Named list member can be referenced directly with the "$" operator
using name: result.
Examples
# Get the current context/connection
con <- td_get_context()$connection
# Load example data.
loadExampleData("scalemap_example", "scale_housing")
loadExampleData("scale_example", "scale_stat", "scale_housing_test")
# Create object(s) of class "tbl_teradata".
scale_housing <- tbl(con, "scale_housing")
scale_housing_test <- tbl(con, "scale_housing_test")
scale_stat <- tbl(con, "scale_stat")
# Example 1 - This example scales (normalizes) input data using the
# midrange method and the default values for the arguments "intercept"
# and "multiplier" (0 and 1 respectively).
td_scale_map_out <- td_scale_map_mle(data=scale_housing,
input.columns=c('price','lotsize','bedrooms',
'bathrms','stories')
)
td_scale_out1 <- td_scale_mle(object=td_scale_map_out,
data=scale_housing,
method=c("midrange"),
accumulate=c("id")
)
# Example 2 - This example uses a tbl_teradata as input for object argument and
# the "intercept" argument has the value "-min" (where min is the global minimum value)
# and we also specify different multiplier values for corresponding columns.
td_scale_out2 <- td_scale_mle(object = scale_stat,
data = scale_housing,
method = c("midrange"),
accumulate = c("id"),
multiplier = c(1,2,3,4,5),
intercept = c("-min")
)
# Example 3 - This example scales input data using multiple
# methods-midrange, mean, maxabs, and range.
td_scale_out3 <- td_scale_mle(object = scale_stat,
data = scale_housing_test,
method = c("midrange","mean","maxabs","range"),
accumulate = c("id")
)
# Example 4 - This example uses the Scale function to scale data (using
# the maxabs method) before inputting it to the function td_kmeans_mle(),
# which outputs the centroids of the clusters in the dataset.
loadExampleData("kmeans_example", "computers_train1")
computers_train1 <- tbl(con, "computers_train1")
td_scale_map_out4 <- td_scale_map_mle(data=computers_train1,
input.columns=c('price','speed','hd','ram'),
miss.value='OMIT'
)
# Create tbl_teradata of Scaled Data using Scale function.
td_scale_out4 <- td_scale_mle(object=td_scale_map_out4,
data=computers_train1,
method=c("maxabs"),
accumulate=c("id")
)
# Use the scaled data as input to KMeans to get clusters.
td_kmeans_out <- td_kmeans_mle(data = td_scale_out4$result,
centers = 8,
iter.max = 10,
threshold = 0.05
)