DistributionMatchReduce version 1.6, DistributionMatchMultiInput version 1.3
SELECT * FROM DistributionMatchReduce ( ON DistributionMatchMultiInput ( ON (SELECT COUNT(1) AS counts, SUM(COUNT(1)) OVER (PARTITION BY column[,...] ORDER BY column) AS rank, column [,...], column FROM input_table WHERE column IS NOT NULL GROUP BY column [,...], column ) AS input PARTITION BY ANY ON (SELECT column [,...], COUNT(*) AS group_size, AVG (column) AS mean, STDDEV (column) AS sd, MAX (column) AS maximum, MIN (column) AS minimum FROM input_table WHERE column IS NOT NULL GROUP BY column[,...] ) AS groupstats DIMENSION USING ValueColumn ('value_column') [ Tests ('test' [,...]) ] [ Distributions ('distribution1:parameter1' [,... ]) ] [ GroupByColumns ({ 'group_by_column' | group_by_column_range }[,...]) ] MinGroupSize (minGroupSize) [ NumCell (cell_Size) ] ) AS alias_1 PARTITION BY column[,...] [ USING "Top" ('top') ] ) AS alias_2;
For continuous distributions, if your input table already includes a rank column, replace this clause:
ON (SELECT RANK()...
with this clause:
ON SELECT * FROM input_table .