DistributionMatchReduce version 1.7, DistributionMatchMultiInput version 1.4
SELECT * FROM DistributionMatchReduce ( ON DistributionMatchMultiInput ( ON (SELECT RANK() OVER (PARTITION BY col [,...] ORDER BY column) AS rank, * FROM input_table WHERE column IS NOT NULL ) AS InputTable PARTITION BY ANY ON (SELECT col [,...], COUNT(*) AS group_size FROM input_table WHERE column IS NOT NULL GROUP BY col [,...] ) AS GroupStatistics DIMENSION USING TargetColumn ('target_column') [ Tests ('test' [,...]) ] Distributions ('distribution:parameter' [,...]) [ GroupByColumns ({ 'group_column' | group_column_range }[,...]) ] [ MinGroupSize (minGroupSize) ] [ NumCell (cell_size) ] ) AS alias_1 PARTITION BY col [,...] ) AS alias_2;
If your input table already includes a rank column, replace this clause:
ON (SELECT RANK()...
with this clause:
ON SELECT * FROM input_table.