DistributionMatchReduce version 1.6, DistributionMatchMultiInput version 1.3
SELECT * FROM DistributionMatchReduce ( ON DistributionMatchMultiInput ( ON (SELECT RANK() OVER (PARTITION BY column [,...] ORDER BY column) AS rank, * FROM input_table WHERE column IS NOT NULL ) AS input PARTITION BY column [,...] ON (SELECT col [,...], COUNT(*) AS group_size FROM input_table WHERE column IS NOT NULL c GROUP BY column [,...] ) AS groupstats PARTITION BY column [,...] USING ValueColumn ('value_column') [ Tests ('test' [,...]) ] Distributions ('distribution:parameters' [,...]) [ GroupByColumns ({ 'group_by_column' | group_by_column_range }[,...]) ] [ MinGroupSize (min_group_size) ] [ NumCell (cell_size) ] ) AS alias_1 PARTITION BY column [,...] ) AS alias_2;
If your input table already includes a rank column, replace this clause:
ON (SELECT RANK()...
with this clause:
ON SELECT * FROM input_table .