Description
tdFillNa()
allows user to perform missing value/null replacement
transformations.
Note:
Object of this class is passed to "fillna" argument of
td_transform_valib()
.
Usage
tdFillNa(columns=NULL, datatype=NULL, style="mean", value=NULL)
Arguments
columns |
Optional Argument. | ||||||||||||||||||||||||||||||||||||
datatype |
Optional Argument.
Notes:
Examples:
Types: character | ||||||||||||||||||||||||||||||||||||
style |
Optional Argument. | ||||||||||||||||||||||||||||||||||||
value |
Optional Argument. Required when "style" is 'literal' or
'imputed'.
Types: tbl_teradata, logical, integer, numeric, character |
Value
An object of tdFillNa class.
Examples
# Notes:
# 1. To run any transformation, user needs to use td_transform_valib()
# function.
# 2. To do so set option 'val.install.location' to the database name
# where Vantage analytic library functions are installed.
# 3. Datasets used in these examples can be loaded using Vantage Analytic
# Library installer.
# Get the current context/connection
con <- td_get_context()$connection
# Set the option 'val.install.location'.
options(val.install.location = "SYSLIB")
# Create object(s) of class "tbl_teradata".
sales <- tbl(con, "sales")
sales
employee_info <- tbl(con, "employee_info")
employee_info
# Example 1: Replace missing values in columns 'Jan' and 'Mar', with
# a literal value 0. Output columns are named as 'january' and
# 'march' respectively.
# Perform the missing value transformation using
# td_transform_valib() function from Vantage Analytic Library.
# Create tdFillNa object.
fillna_literal <- tdFillNa(style="literal", value=0,
columns=list("Jan"="january", "Mar"="march"))
# Perform the missing value transformation using td_transform_valib().
obj <- td_transform_valib(data=sales, fillna=fillna_literal,
key.columns="accounts")
obj$result
# Example 2: Replace missing values in column 'Jan' with 'median' value from
# that column. Output column produced in the output is named as
# 'Jan2'.
# Perform the missing value transformation using
# td_transform_valib() function from Vantage Analytic Library.
# Create tdFillNa object.
fillna_median <- tdFillNa(style="median", columns=list("Jan"="Jan2"))
# Perform the missing value transformation using td_transform_valib().
obj <- td_transform_valib(data=sales, fillna=fillna_median,
key.columns="accounts")
obj$result
# Example 3: Replace missing values in column 'Apr' with a median value
# without mean from that column.
# Perform the missing value transformation using
# td_transform_valib() function from Vantage Analytic Library.
fillna_mwm <- tdFillNa(style="median_wo_mean", columns="Apr")
# Perform the missing value transformation using td_transform_valib().
obj <- td_transform_valib(data=sales, fillna=fillna_mwm,
key.columns="accounts")
obj$result
# Example 4: Replace missing values in column 'Apr' with 'mode' value from
# that column. Output column produced in the output is named as
# 'Apr2000'.
# Perform the missing value transformation using
# td_transform_valib() function from Vantage Analytic Library.
fillna_mode <- tdFillNa(style="mode", columns=list("Apr"="Apr2000"))
# Perform the missing value transformation using td_transform_valib().
obj <- td_transform_valib(data=sales, fillna=fillna_mode,
key.columns="accounts")
obj$result
# Example 5: Replace missing values in columns 'masters' and 'programming'
# using 'imputed' style.
# Perform the missing value transformation using
# td_transform_valib() function from Vantage Analytic Library.
# Create object(s) of class "tbl_teradata".
admissions_train_nulls <- tbl(con, "admissions_train_nulls")
admissions_train_nulls
admissions_train <- tbl(con, "admissions_train")
admissions_train
# Replace NULL values in the columns 'masters' and 'programming'
# in admissions_train_nulls dataframe with the values in the corresponding
# column's values in admissions_train dataframe.
fillna_imputed <- tdFillNa(style="imputed", value=admissions_train,
columns=c("masters", "programming"))
# Perform the missing value transformation using td_transform_valib().
obj <- td_transform_valib(data=admissions_train_nulls, fillna=fillna_imputed,
key.columns="id")
obj$result
# Example 6: This example shows how one can operate on date and character
# columns. Example also showcases using multiple missing value
# treatment techniques in one single call for variable
# transformation.
# Create object(s) of class "tbl_teradata".
employee_info <- tbl(con, "employee_info")
# Using literal style for missing value treatment on a date type
# column 'joined_date.'
fillna_1 <- tdFillNa(style="literal", value="DATE 1995-12-23",
columns=list("joined_date"="date1"))
# Using literal style for missing value treatment on a character type
# column 'first_name'. Replace missing values with 'FNU', i.e.,
# First Name Unknown.
fillna_2 <- tdFillNa(style="literal", value="FNU",
columns=list("first_name"="char1"))
# Using mean value for missing value treatment on a date type
# column 'joined_date'.
fillna_3 <- tdFillNa(style="mean", columns=list("joined_date"="date2"))
# Using median value for missing value treatment on a date type
# column 'joined_date'.
fillna_4 <- tdFillNa(style="median", columns=list("joined_date"="date2A"))
# Using median value without mean for missing value treatment on a date type
# column 'joined_date'.
fillna_5 <- tdFillNa(style="median_wo_mean",
columns=list("joined_date"="date3"))
# Using mode value for missing value treatment on a date type
# column 'joined_date'.
fillna_6 <- tdFillNa(style="mode", columns=list("joined_date"="date4"))
# Using median value without mean for missing value treatment on a character
# type column 'first_name'.
fillna_7 <- tdFillNa(style="median_wo_mean",
columns=list("first_name"="char2"))
# Using mode value for missing value treatment on a character type column
# 'first_name'.
fillna_8 <- tdFillNa(style="mode", columns=list("first_name"="char3"))
# Perform the missing value transformations using td_transform_valib().
obj <- td_transform_valib(data=employee_info,
fillna=c(fillna_1, fillna_2, fillna_3, fillna_4,
fillna_5, fillna_6, fillna_7, fillna_8),
key.columns="employee_no")
obj$result