Malloy's rederer has flexible and powerful way of pivoting data.
Nesting first
Malloy's ability to nest queries allows you to compute two levels of queries simultaneously. The query below first groups airports by state
and then groups by the type of facility (fac_type
). For each state we see count of all the facilities.
run: duckdb.table('../data/airports.parquet') -> { group_by: state aggregate: facility_count is count() nest: by_fac_type is { group_by: fac_type aggregate: facility_count is count() } }
[ { "state": "TX", "facility_count": 1845, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 1389 }, { "fac_type": "HELIPORT", "facility_count": 435 }, { "fac_type": "ULTRALIGHT", "facility_count": 8 }, { "fac_type": "STOLPORT", "facility_count": 8 }, { "fac_type": "GLIDERPORT", "facility_count": 5 } ] }, { "state": "CA", "facility_count": 984, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 569 }, { "fac_type": "HELIPORT", "facility_count": 396 }, { "fac_type": "SEAPLANE BASE", "facility_count": 12 }, { "fac_type": "GLIDERPORT", "facility_count": 3 }, { "fac_type": "ULTRALIGHT", "facility_count": 2 }, { "fac_type": "STOLPORT", "facility_count": 2 } ] }, { "state": "IL", "facility_count": 890, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 625 }, { "fac_type": "HELIPORT", "facility_count": 245 }, { "fac_type": "SEAPLANE BASE", "facility_count": 8 }, { "fac_type": "ULTRALIGHT", "facility_count": 6 }, { "fac_type": "GLIDERPORT", "facility_count": 2 }, { "fac_type": "BALLOONPORT", "facility_count": 2 }, { "fac_type": "STOLPORT", "facility_count": 2 } ] }, { "state": "FL", "facility_count": 856, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 511 }, { "fac_type": "HELIPORT", "facility_count": 280 }, { "fac_type": "SEAPLANE BASE", "facility_count": 43 }, { "fac_type": "STOLPORT", "facility_count": 13 }, { "fac_type": "ULTRALIGHT", "facility_count": 5 }, { "fac_type": "GLIDERPORT", "facility_count": 4 } ] }, { "state": "PA", "facility_count": 804, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 468 }, { "fac_type": "HELIPORT", "facility_count": 307 }, { "fac_type": "ULTRALIGHT", "facility_count": 13 }, { "fac_type": "SEAPLANE BASE", "facility_count": 10 }, { "fac_type": "STOLPORT", "facility_count": 3 }, { "fac_type": "GLIDERPORT", "facility_count": 3 } ] } ]
WITH __stage0 AS ( SELECT group_set, base."state" as "state__0", CASE WHEN group_set=0 THEN COUNT(1) END as "facility_count__0", CASE WHEN group_set=1 THEN base."fac_type" END as "fac_type__1", CASE WHEN group_set=1 THEN COUNT(1) END as "facility_count__1" FROM '../data/airports.parquet' as base CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set GROUP BY 1,2,4 ) SELECT "state__0" as "state", MAX(CASE WHEN group_set=0 THEN "facility_count__0" END) as "facility_count", COALESCE(LIST({ "fac_type": "fac_type__1", "facility_count": "facility_count__1"} ORDER BY "facility_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_fac_type" FROM __stage0 GROUP BY 1 ORDER BY 2 desc NULLS LAST
Just add '# pivot'
We can take this exact same query above (and same output) and have it simply rendered as a pivot table by adding a # pivot
tag on the nested query.
run: duckdb.table('../data/airports.parquet') -> { group_by: state aggregate: facility_count is count() # pivot nest: by_fac_type is { group_by: fac_type aggregate: facility_count is count() } }
[ { "state": "TX", "facility_count": 1845, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 1389 }, { "fac_type": "HELIPORT", "facility_count": 435 }, { "fac_type": "ULTRALIGHT", "facility_count": 8 }, { "fac_type": "STOLPORT", "facility_count": 8 }, { "fac_type": "GLIDERPORT", "facility_count": 5 } ] }, { "state": "CA", "facility_count": 984, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 569 }, { "fac_type": "HELIPORT", "facility_count": 396 }, { "fac_type": "SEAPLANE BASE", "facility_count": 12 }, { "fac_type": "GLIDERPORT", "facility_count": 3 }, { "fac_type": "ULTRALIGHT", "facility_count": 2 }, { "fac_type": "STOLPORT", "facility_count": 2 } ] }, { "state": "IL", "facility_count": 890, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 625 }, { "fac_type": "HELIPORT", "facility_count": 245 }, { "fac_type": "SEAPLANE BASE", "facility_count": 8 }, { "fac_type": "ULTRALIGHT", "facility_count": 6 }, { "fac_type": "BALLOONPORT", "facility_count": 2 }, { "fac_type": "GLIDERPORT", "facility_count": 2 }, { "fac_type": "STOLPORT", "facility_count": 2 } ] }, { "state": "FL", "facility_count": 856, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 511 }, { "fac_type": "HELIPORT", "facility_count": 280 }, { "fac_type": "SEAPLANE BASE", "facility_count": 43 }, { "fac_type": "STOLPORT", "facility_count": 13 }, { "fac_type": "ULTRALIGHT", "facility_count": 5 }, { "fac_type": "GLIDERPORT", "facility_count": 4 } ] }, { "state": "PA", "facility_count": 804, "by_fac_type": [ { "fac_type": "AIRPORT", "facility_count": 468 }, { "fac_type": "HELIPORT", "facility_count": 307 }, { "fac_type": "ULTRALIGHT", "facility_count": 13 }, { "fac_type": "SEAPLANE BASE", "facility_count": 10 }, { "fac_type": "STOLPORT", "facility_count": 3 }, { "fac_type": "GLIDERPORT", "facility_count": 3 } ] } ]
WITH __stage0 AS ( SELECT group_set, base."state" as "state__0", CASE WHEN group_set=0 THEN COUNT(1) END as "facility_count__0", CASE WHEN group_set=1 THEN base."fac_type" END as "fac_type__1", CASE WHEN group_set=1 THEN COUNT(1) END as "facility_count__1" FROM '../data/airports.parquet' as base CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set GROUP BY 1,2,4 ) SELECT "state__0" as "state", MAX(CASE WHEN group_set=0 THEN "facility_count__0" END) as "facility_count", COALESCE(LIST({ "fac_type": "fac_type__1", "facility_count": "facility_count__1"} ORDER BY "facility_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_fac_type" FROM __stage0 GROUP BY 1 ORDER BY 2 desc NULLS LAST
Pivots are really powerful
The model below is used in all the following examples.
source: flights is duckdb.table('../data/flights.parquet') extend { join_one: carriers is duckdb.table('../data/carriers.parquet') on carrier = carriers.code join_one: dest is duckdb.table('../data/airports.parquet') on destination = dest.code join_one: orig is duckdb.table('../data/airports.parquet') on destination = orig.code measure: flight_count is count() total_distance is distance.sum() }
The Classic Pivot
A classic data pivot is data is dimensionalized by two attributes the data can be rendered with one dimension along the x-axis and one dimension on the y-axis with the aggregate computations making up the center of the table. The cross section of the data allows for easy comparison. In Malloy, pivots columns are nested queries.
Carriers by FAA Region
run: flights -> { group_by: carriers.nickname # pivot nest: by_faa_region is { group_by: orig.faa_region aggregate: flight_count } }
[ { "nickname": "ATA", "by_faa_region": [ { "faa_region": "AGL", "flight_count": 1672 }, { "faa_region": "AEA", "flight_count": 480 }, { "faa_region": "ASO", "flight_count": 358 }, { "faa_region": "AWP", "flight_count": 177 }, { "faa_region": "ANM", "flight_count": 149 }, { "faa_region": "ASW", "flight_count": 132 }, { "faa_region": "ANE", "flight_count": 65 } ] }, { "nickname": "Alaska", "by_faa_region": [ { "faa_region": "ANM", "flight_count": 4345 }, { "faa_region": "AWP", "flight_count": 3604 }, { "faa_region": "AAL", "flight_count": 503 }, { "faa_region": "ASW", "flight_count": 1 } ] }, { "nickname": "America West", "by_faa_region": [ { "faa_region": "AWP", "flight_count": 7224 }, { "faa_region": "ASW", "flight_count": 912 }, { "faa_region": "ANM", "flight_count": 868 }, { "faa_region": "ACE", "flight_count": 450 }, { "faa_region": "AGL", "flight_count": 289 }, { "faa_region": "ASO", "flight_count": 7 } ] }, { "nickname": "American", "by_faa_region": [ { "faa_region": "ASW", "flight_count": 11319 }, { "faa_region": "AWP", "flight_count": 6026 }, { "faa_region": "AGL", "flight_count": 5946 }, { "faa_region": "AEA", "flight_count": 4200 }, { "faa_region": "ASO", "flight_count": 3530 }, { "faa_region": "ANM", "flight_count": 1469 }, { "faa_region": "ACE", "flight_count": 1130 }, { "faa_region": "ANE", "flight_count": 956 }, { "faa_region": "AAL", "flight_count": 1 } ] }, { "nickname": "American Eagle", "by_faa_region": [ { "faa_region": "ASW", "flight_count": 11824 }, { "faa_region": "AGL", "flight_count": 1474 }, { "faa_region": "AEA", "flight_count": 1132 }, { "faa_region": "ANE", "flight_count": 665 }, { "faa_region": "ACE", "flight_count": 470 }, { "faa_region": "ASO", "flight_count": 244 }, { "faa_region": "AWP", "flight_count": 43 }, { "faa_region": "ANM", "flight_count": 17 } ] } ]
WITH __stage0 AS ( SELECT group_set, carriers_0."nickname" as "nickname__0", CASE WHEN group_set=1 THEN orig_0."faa_region" END as "faa_region__1", CASE WHEN group_set=1 THEN COUNT(1) END as "flight_count__1" FROM '../data/flights.parquet' as base LEFT JOIN '../data/carriers.parquet' AS carriers_0 ON base."carrier"=carriers_0."code" LEFT JOIN '../data/airports.parquet' AS orig_0 ON base."destination"=orig_0."code" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set GROUP BY 1,2,3 ) SELECT "nickname__0" as "nickname", COALESCE(LIST({ "faa_region": "faa_region__1", "flight_count": "flight_count__1"} ORDER BY "flight_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_faa_region" FROM __stage0 GROUP BY 1 ORDER BY 1 asc NULLS LAST
The Pivot Filtered
You can control which dimension are shown in the pivot with a filter (and their order)
run: flights -> { group_by: carriers.nickname # pivot nest: by_state is { where: orig.state = 'CA' | 'NY' | 'WA' // only show these states group_by: orig.state aggregate: flight_count order_by: state // sort order of the pivoted columns } }
[ { "nickname": "ATA", "by_state": [ { "state": "CA", "flight_count": 78 }, { "state": "NY", "flight_count": 208 }, { "state": "WA", "flight_count": 27 } ] }, { "nickname": "Alaska", "by_state": [ { "state": "CA", "flight_count": 2633 }, { "state": "WA", "flight_count": 3257 } ] }, { "nickname": "America West", "by_state": [ { "state": "CA", "flight_count": 2038 }, { "state": "WA", "flight_count": 216 } ] }, { "nickname": "American", "by_state": [ { "state": "CA", "flight_count": 4681 }, { "state": "NY", "flight_count": 2168 }, { "state": "WA", "flight_count": 495 } ] }, { "nickname": "American Eagle", "by_state": [ { "state": "CA", "flight_count": 43 }, { "state": "NY", "flight_count": 961 } ] } ]
WITH __stage0 AS ( SELECT group_set, carriers_0."nickname" as "nickname__0", CASE WHEN group_set=1 THEN orig_0."state" END as "state__1", CASE WHEN group_set=1 THEN COUNT(1) END as "flight_count__1" FROM '../data/flights.parquet' as base LEFT JOIN '../data/airports.parquet' AS orig_0 ON base."destination"=orig_0."code" LEFT JOIN '../data/carriers.parquet' AS carriers_0 ON base."carrier"=carriers_0."code" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set WHERE (group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA'))) GROUP BY 1,2,3 ) SELECT "nickname__0" as "nickname", COALESCE(LIST({ "state": "state__1", "flight_count": "flight_count__1"} ORDER BY "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state" FROM __stage0 GROUP BY 1 ORDER BY 1 asc NULLS LAST
The Pivot with multiple aggreagtes
Pivots can have multiple aggregates. In this case we show flight_count
and total_distance
for each of the states.
run: flights -> { group_by: carriers.nickname # pivot nest: by_state is { where: orig.state = 'CA' | 'NY' | 'WA' // only show these states group_by: orig.state aggregate: flight_count total_distance order_by: state // sort order of the pivoted columns } }
[ { "nickname": "ATA", "by_state": [ { "state": "CA", "flight_count": 78, "total_distance": 144910 }, { "state": "NY", "flight_count": 208, "total_distance": 149936 }, { "state": "WA", "flight_count": 27, "total_distance": 46791 } ] }, { "nickname": "Alaska", "by_state": [ { "state": "CA", "flight_count": 2633, "total_distance": 2111255 }, { "state": "WA", "flight_count": 3257, "total_distance": 2686505 } ] }, { "nickname": "America West", "by_state": [ { "state": "CA", "flight_count": 2038, "total_distance": 847343 }, { "state": "WA", "flight_count": 216, "total_distance": 216113 } ] }, { "nickname": "American", "by_state": [ { "state": "CA", "flight_count": 4681, "total_distance": 7266808 }, { "state": "NY", "flight_count": 2168, "total_distance": 3183109 }, { "state": "WA", "flight_count": 495, "total_distance": 827090 } ] }, { "nickname": "American Eagle", "by_state": [ { "state": "CA", "flight_count": 43, "total_distance": 57945 }, { "state": "NY", "flight_count": 961, "total_distance": 324058 } ] } ]
WITH __stage0 AS ( SELECT group_set, carriers_0."nickname" as "nickname__0", CASE WHEN group_set=1 THEN orig_0."state" END as "state__1", CASE WHEN group_set=1 THEN COUNT(1) END as "flight_count__1", CASE WHEN group_set=1 THEN COALESCE(SUM(base."distance"),0) END as "total_distance__1" FROM '../data/flights.parquet' as base LEFT JOIN '../data/airports.parquet' AS orig_0 ON base."destination"=orig_0."code" LEFT JOIN '../data/carriers.parquet' AS carriers_0 ON base."carrier"=carriers_0."code" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set WHERE (group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA'))) GROUP BY 1,2,3 ) SELECT "nickname__0" as "nickname", COALESCE(LIST({ "state": "state__1", "flight_count": "flight_count__1", "total_distance": "total_distance__1"} ORDER BY "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state" FROM __stage0 GROUP BY 1 ORDER BY 1 asc NULLS LAST
Aggregates outside the pivot (and row ordering)
Malloy allows you to intermix unpivoted data along with pivoted data through nesting. Since pivots are nests, any aggregate outside the nest is just shown normally.
run: flights -> { group_by: carriers.nickname aggregate: total_flights is flight_count // outside the pivot total_distance # pivot nest: by_state is { where: orig.state = 'CA' | 'NY' | 'WA' // only show these states group_by: orig.state aggregate: flight_count order_by: state // sort order of the pivoted columns } }
[ { "nickname": "Southwest", "total_flights": 88751, "total_distance": 54619152, "by_state": [ { "state": "CA", "flight_count": 19764 }, { "state": "NY", "flight_count": 1898 }, { "state": "WA", "flight_count": 1902 } ] }, { "nickname": "USAir", "total_flights": 37683, "total_distance": 23721642, "by_state": [ { "state": "CA", "flight_count": 541 }, { "state": "NY", "flight_count": 2791 }, { "state": "WA", "flight_count": 185 } ] }, { "nickname": "American", "total_flights": 34577, "total_distance": 37684885, "by_state": [ { "state": "CA", "flight_count": 4681 }, { "state": "NY", "flight_count": 2168 }, { "state": "WA", "flight_count": 495 } ] }, { "nickname": "Northwest", "total_flights": 33580, "total_distance": 33376503, "by_state": [ { "state": "CA", "flight_count": 2203 }, { "state": "NY", "flight_count": 775 }, { "state": "WA", "flight_count": 850 } ] }, { "nickname": "United", "total_flights": 32757, "total_distance": 38882934, "by_state": [ { "state": "CA", "flight_count": 6873 }, { "state": "NY", "flight_count": 576 }, { "state": "WA", "flight_count": 742 } ] } ]
WITH __stage0 AS ( SELECT group_set, carriers_0."nickname" as "nickname__0", (CASE WHEN group_set=0 THEN COUNT(1) END) as "total_flights__0", CASE WHEN group_set=0 THEN COALESCE(SUM(base."distance"),0) END as "total_distance__0", CASE WHEN group_set=1 THEN orig_0."state" END as "state__1", CASE WHEN group_set=1 THEN COUNT(1) END as "flight_count__1" FROM '../data/flights.parquet' as base LEFT JOIN '../data/airports.parquet' AS orig_0 ON base."destination"=orig_0."code" LEFT JOIN '../data/carriers.parquet' AS carriers_0 ON base."carrier"=carriers_0."code" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set WHERE (group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA'))) GROUP BY 1,2,5 ) SELECT "nickname__0" as "nickname", MAX(CASE WHEN group_set=0 THEN "total_flights__0" END) as "total_flights", MAX(CASE WHEN group_set=0 THEN "total_distance__0" END) as "total_distance", COALESCE(LIST({ "state": "state__1", "flight_count": "flight_count__1"} ORDER BY "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state" FROM __stage0 GROUP BY 1 ORDER BY 2 desc NULLS LAST
Multiple pivots in the same table
Malloy allows you to intermix unpivoted data along with pivoted data through nesting. Since pivots are nests, any aggregate outside the nest is just shown normally.
run: flights -> { group_by: carriers.nickname aggregate: total_flights is flight_count // outside the pivot # pivot nest: by_state is { where: orig.state = 'CA' | 'NY' | 'WA' // only show these states group_by: orig.state aggregate: flight_count order_by: state // sort order of the pivoted columns } # pivot nest: by_year is { where: dep_time.year > @2003 group_by: dep_year is dep_time.year aggregate: flight_count } }
[ { "nickname": "Southwest", "total_flights": 88751, "by_state": [ { "state": "CA", "flight_count": 19764 }, { "state": "NY", "flight_count": 1898 }, { "state": "WA", "flight_count": 1902 } ], "by_year": [ { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 17549 }, { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 14640 } ] }, { "nickname": "USAir", "total_flights": 37683, "by_state": [ { "state": "CA", "flight_count": 541 }, { "state": "NY", "flight_count": 2791 }, { "state": "WA", "flight_count": 185 } ], "by_year": [ { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 7446 }, { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 5388 } ] }, { "nickname": "American", "total_flights": 34577, "by_state": [ { "state": "CA", "flight_count": 4681 }, { "state": "NY", "flight_count": 2168 }, { "state": "WA", "flight_count": 495 } ], "by_year": [ { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 5543 }, { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 5604 } ] }, { "nickname": "Northwest", "total_flights": 33580, "by_state": [ { "state": "CA", "flight_count": 2203 }, { "state": "NY", "flight_count": 775 }, { "state": "WA", "flight_count": 850 } ], "by_year": [ { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 5869 }, { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 6306 } ] }, { "nickname": "United", "total_flights": 32757, "by_state": [ { "state": "CA", "flight_count": 6873 }, { "state": "NY", "flight_count": 576 }, { "state": "WA", "flight_count": 742 } ], "by_year": [ { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 6876 }, { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 6817 } ] } ]
WITH __stage0 AS ( SELECT group_set, carriers_0."nickname" as "nickname__0", (CASE WHEN group_set=0 THEN COUNT(1) END) as "total_flights__0", CASE WHEN group_set=1 THEN orig_0."state" END as "state__1", CASE WHEN group_set=1 THEN COUNT(1) END as "flight_count__1", CASE WHEN group_set=2 THEN DATE_TRUNC('year', base."dep_time") END as "dep_year__2", CASE WHEN group_set=2 THEN COUNT(1) END as "flight_count__2" FROM '../data/flights.parquet' as base LEFT JOIN '../data/airports.parquet' AS orig_0 ON base."destination"=orig_0."code" LEFT JOIN '../data/carriers.parquet' AS carriers_0 ON base."carrier"=carriers_0."code" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,2,1)) as group_set ) as group_set WHERE ((group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA')))) AND ((group_set NOT IN (2) OR (group_set IN (2) AND DATE_TRUNC('year', base."dep_time")>=TIMESTAMP '2004-01-01 00:00:00'))) GROUP BY 1,2,4,6 ) SELECT "nickname__0" as "nickname", MAX(CASE WHEN group_set=0 THEN "total_flights__0" END) as "total_flights", COALESCE(LIST({ "state": "state__1", "flight_count": "flight_count__1"} ORDER BY "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state", COALESCE(LIST({ "dep_year": "dep_year__2", "flight_count": "flight_count__2"} ORDER BY "dep_year__2" desc NULLS LAST) FILTER (WHERE group_set=2),[]) as "by_year" FROM __stage0 GROUP BY 1 ORDER BY 2 desc NULLS LAST
Ordering by a column in the pivot
Malloy sorts by the first aggregate column in finds, generally. The # hidden
tag allows you to not show a column in a result. By adding a measure filtered by the column we are interested in, we can sort the entire table on a column in the pivot.
Rows sorted by 'CA' flights
run: flights -> { group_by: carriers.nickname # hidden aggregate: ca_count is flight_count { where: orig.state = 'CA' } # pivot nest: by_state is { where: orig.state = 'CA' | 'NY' | 'WA' // only show these states group_by: orig.state aggregate: flight_count order_by: state // sort order of the pivoted columns } }
[ { "nickname": "Southwest", "ca_count": 19764, "by_state": [ { "state": "CA", "flight_count": 19764 }, { "state": "NY", "flight_count": 1898 }, { "state": "WA", "flight_count": 1902 } ] }, { "nickname": "United", "ca_count": 6873, "by_state": [ { "state": "CA", "flight_count": 6873 }, { "state": "NY", "flight_count": 576 }, { "state": "WA", "flight_count": 742 } ] }, { "nickname": "American", "ca_count": 4681, "by_state": [ { "state": "CA", "flight_count": 4681 }, { "state": "NY", "flight_count": 2168 }, { "state": "WA", "flight_count": 495 } ] }, { "nickname": "Alaska", "ca_count": 2633, "by_state": [ { "state": "CA", "flight_count": 2633 }, { "state": "WA", "flight_count": 3257 } ] }, { "nickname": "Northwest", "ca_count": 2203, "by_state": [ { "state": "CA", "flight_count": 2203 }, { "state": "NY", "flight_count": 775 }, { "state": "WA", "flight_count": 850 } ] } ]
WITH __stage0 AS ( SELECT group_set, carriers_0."nickname" as "nickname__0", (CASE WHEN group_set=0 THEN COUNT(CASE WHEN orig_0."state"='CA' THEN 1 END) END) as "ca_count__0", CASE WHEN group_set=1 THEN orig_0."state" END as "state__1", CASE WHEN group_set=1 THEN COUNT(1) END as "flight_count__1" FROM '../data/flights.parquet' as base LEFT JOIN '../data/airports.parquet' AS orig_0 ON base."destination"=orig_0."code" LEFT JOIN '../data/carriers.parquet' AS carriers_0 ON base."carrier"=carriers_0."code" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set WHERE (group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA'))) GROUP BY 1,2,4 ) SELECT "nickname__0" as "nickname", MAX(CASE WHEN group_set=0 THEN "ca_count__0" END) as "ca_count", COALESCE(LIST({ "state": "state__1", "flight_count": "flight_count__1"} ORDER BY "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state" FROM __stage0 GROUP BY 1 ORDER BY 2 desc NULLS LAST
Malloy Renderer uses the metadata from the query to decide which columns to pivot (dimensions are pivoted, aggreates are not). In multistage queries this information is incorrect. You can manually specify this information with a 'dimensions' parameter on the pivot tag.
run: flights -> { group_by: carriers.nickname aggregate: total_flights is flight_count # pivot { dimensions=["dep_year"] } nest: by_year is { group_by: dep_year is dep_time.year aggregate: flight_count calculate: growth is (flight_count - lag(flight_count, 1)) / flight_count order_by: dep_year } -> { where: dep_year > @2003 select: dep_year flight_count # percent growth } }
[ { "nickname": "Southwest", "total_flights": 88751, "by_year": [ { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 14640, "growth": 0.023224043715846996 }, { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 17549, "growth": 0.16576443102171065 } ] }, { "nickname": "USAir", "total_flights": 37683, "by_year": [ { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 5388, "growth": 0.19803266518188567 }, { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 7446, "growth": 0.2763900080580177 } ] }, { "nickname": "American", "total_flights": 34577, "by_year": [ { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 5604, "growth": 0.006780870806566738 }, { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 5543, "growth": -0.011004871008479163 } ] }, { "nickname": "Northwest", "total_flights": 33580, "by_year": [ { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 6306, "growth": 0.1774500475737393 }, { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 5869, "growth": -0.07445902197989436 } ] }, { "nickname": "United", "total_flights": 32757, "by_year": [ { "dep_year": "2004-01-01T00:00:00.000Z", "flight_count": 6817, "growth": 0.10987237787883233 }, { "dep_year": "2005-01-01T00:00:00.000Z", "flight_count": 6876, "growth": 0.008580570098894706 } ] } ]
WITH __stage0 AS ( SELECT group_set, carriers_0."nickname" as "nickname__0", (CASE WHEN group_set=0 THEN COUNT(1) END) as "total_flights__0", CASE WHEN group_set=1 THEN DATE_TRUNC('year', base."dep_time") END as "dep_year__1", CASE WHEN group_set=1 THEN COUNT(1) END as "flight_count__1", (((CASE WHEN group_set=1 THEN COUNT(1) END)-(CASE WHEN group_set=1 THEN LAG((CASE WHEN group_set=1 THEN COUNT(1) END),1) OVER(PARTITION BY group_set, carriers_0."nickname" ORDER BY CASE WHEN group_set=1 THEN DATE_TRUNC('year', base."dep_time") END ASC NULLS LAST ) END)))*1.0/(CASE WHEN group_set=1 THEN COUNT(1) END) as "growth__1" FROM '../data/flights.parquet' as base LEFT JOIN '../data/carriers.parquet' AS carriers_0 ON base."carrier"=carriers_0."code" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set GROUP BY 1,2,4 ) SELECT "nickname__0" as "nickname", MAX(CASE WHEN group_set=0 THEN "total_flights__0" END) as "total_flights", (WITH __stage0 AS ( SELECT base."dep_year" as "dep_year", base."flight_count" as "flight_count", base."growth" as "growth" FROM (SELECT UNNEST(COALESCE(LIST({ "dep_year": "dep_year__1", "flight_count": "flight_count__1", "growth": "growth__1"} ORDER BY "dep_year__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[])) as base) as base WHERE base."dep_year">=TIMESTAMP '2004-01-01 00:00:00' ) SELECT LIST(STRUCT_PACK("dep_year","flight_count","growth")) FROM __stage0 ) as "by_year" FROM __stage0 GROUP BY 1 ORDER BY 2 desc NULLS LAST