Pivoting Results

VSCode

Malloy's rederer has flexible and powerful way of pivoting data.

Nesting first

Malloy's ability to nest queries allows you to compute two levels of queries simultaneously. The query below first groups airports by state and then groups by the type of facility (fac_type). For each state we see count of all the facilities.

run: duckdb.table('../data/airports.parquet') -> {
  group_by: state
  aggregate: facility_count is count()
  nest: by_fac_type is  {
    group_by: fac_type
    aggregate: facility_count is count()
  }
}

QUERY RESULTS

[
  {
    "state": "TX",
    "facility_count": 1845,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 1389
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 435
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 8
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 8
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 5
      }
    ]
  },
  {
    "state": "CA",
    "facility_count": 984,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 569
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 396
      },
      {
        "fac_type": "SEAPLANE BASE",
        "facility_count": 12
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 3
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 2
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 2
      }
    ]
  },
  {
    "state": "IL",
    "facility_count": 890,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 625
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 245
      },
      {
        "fac_type": "SEAPLANE BASE",
        "facility_count": 8
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 6
      },
      {
        "fac_type": "BALLOONPORT",
        "facility_count": 2
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 2
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 2
      }
    ]
  },
  {
    "state": "FL",
    "facility_count": 856,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 511
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 280
      },
      {
        "fac_type": "SEAPLANE BASE",
        "facility_count": 43
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 13
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 5
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 4
      }
    ]
  },
  {
    "state": "PA",
    "facility_count": 804,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 468
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 307
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 13
      },
      {
        "fac_type": "SEAPLANE BASE",
        "facility_count": 10
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 3
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 3
      }
    ]
  }
]

WITH __stage0 AS (
  SELECT
    group_set,
    base."state" as "state__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "facility_count__0",
    CASE WHEN group_set=1 THEN
      base."fac_type"
      END as "fac_type__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "facility_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,2,4
)
SELECT
  "state__0" as "state",
  MAX(CASE WHEN group_set=0 THEN "facility_count__0" END) as "facility_count",
  COALESCE(LIST({
    "fac_type": "fac_type__1", 
    "facility_count": "facility_count__1"}  ORDER BY  "facility_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_fac_type"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Just add '# pivot'

We can take this exact same query above (and same output) and have it simply rendered as a pivot table by adding a # pivot tag on the nested query.

run: duckdb.table('../data/airports.parquet') -> {
  group_by: state
  aggregate: facility_count is count()
  # pivot
  nest: by_fac_type is  {
    group_by: fac_type
    aggregate: facility_count is count()
  }
}

QUERY RESULTS

[
  {
    "state": "TX",
    "facility_count": 1845,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 1389
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 435
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 8
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 8
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 5
      }
    ]
  },
  {
    "state": "CA",
    "facility_count": 984,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 569
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 396
      },
      {
        "fac_type": "SEAPLANE BASE",
        "facility_count": 12
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 3
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 2
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 2
      }
    ]
  },
  {
    "state": "IL",
    "facility_count": 890,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 625
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 245
      },
      {
        "fac_type": "SEAPLANE BASE",
        "facility_count": 8
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 6
      },
      {
        "fac_type": "BALLOONPORT",
        "facility_count": 2
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 2
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 2
      }
    ]
  },
  {
    "state": "FL",
    "facility_count": 856,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 511
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 280
      },
      {
        "fac_type": "SEAPLANE BASE",
        "facility_count": 43
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 13
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 5
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 4
      }
    ]
  },
  {
    "state": "PA",
    "facility_count": 804,
    "by_fac_type": [
      {
        "fac_type": "AIRPORT",
        "facility_count": 468
      },
      {
        "fac_type": "HELIPORT",
        "facility_count": 307
      },
      {
        "fac_type": "ULTRALIGHT",
        "facility_count": 13
      },
      {
        "fac_type": "SEAPLANE BASE",
        "facility_count": 10
      },
      {
        "fac_type": "GLIDERPORT",
        "facility_count": 3
      },
      {
        "fac_type": "STOLPORT",
        "facility_count": 3
      }
    ]
  }
]

WITH __stage0 AS (
  SELECT
    group_set,
    base."state" as "state__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "facility_count__0",
    CASE WHEN group_set=1 THEN
      base."fac_type"
      END as "fac_type__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "facility_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,2,4
)
SELECT
  "state__0" as "state",
  MAX(CASE WHEN group_set=0 THEN "facility_count__0" END) as "facility_count",
  COALESCE(LIST({
    "fac_type": "fac_type__1", 
    "facility_count": "facility_count__1"}  ORDER BY  "facility_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_fac_type"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Pivots are really powerful

The model below is used in all the following examples.

source: flights is duckdb.table('../data/flights.parquet') extend {
  join_one: carriers is duckdb.table('../data/carriers.parquet') on carrier = carriers.code
  join_one: dest is  duckdb.table('../data/airports.parquet') on destination = dest.code
  join_one: orig is  duckdb.table('../data/airports.parquet') on destination = orig.code

  measure: 
    flight_count is count()
    total_distance is distance.sum()
}

The Classic Pivot

A classic data pivot is data is dimensionalized by two attributes the data can be rendered with one dimension along the x-axis and one dimension on the y-axis with the aggregate computations making up the center of the table. The cross section of the data allows for easy comparison. In Malloy, pivots columns are nested queries.

Carriers by FAA Region

run: flights -> {
  group_by: carriers.nickname
  # pivot
  nest: by_faa_region is  {
    group_by: orig.faa_region
    aggregate: flight_count 
  }
}

QUERY RESULTS

[
  {
    "nickname": "ATA",
    "by_faa_region": [
      {
        "faa_region": "AGL",
        "flight_count": 1672
      },
      {
        "faa_region": "AEA",
        "flight_count": 480
      },
      {
        "faa_region": "ASO",
        "flight_count": 358
      },
      {
        "faa_region": "AWP",
        "flight_count": 177
      },
      {
        "faa_region": "ANM",
        "flight_count": 149
      },
      {
        "faa_region": "ASW",
        "flight_count": 132
      },
      {
        "faa_region": "ANE",
        "flight_count": 65
      }
    ]
  },
  {
    "nickname": "Alaska",
    "by_faa_region": [
      {
        "faa_region": "ANM",
        "flight_count": 4345
      },
      {
        "faa_region": "AWP",
        "flight_count": 3604
      },
      {
        "faa_region": "AAL",
        "flight_count": 503
      },
      {
        "faa_region": "ASW",
        "flight_count": 1
      }
    ]
  },
  {
    "nickname": "America West",
    "by_faa_region": [
      {
        "faa_region": "AWP",
        "flight_count": 7224
      },
      {
        "faa_region": "ASW",
        "flight_count": 912
      },
      {
        "faa_region": "ANM",
        "flight_count": 868
      },
      {
        "faa_region": "ACE",
        "flight_count": 450
      },
      {
        "faa_region": "AGL",
        "flight_count": 289
      },
      {
        "faa_region": "ASO",
        "flight_count": 7
      }
    ]
  },
  {
    "nickname": "American",
    "by_faa_region": [
      {
        "faa_region": "ASW",
        "flight_count": 11319
      },
      {
        "faa_region": "AWP",
        "flight_count": 6026
      },
      {
        "faa_region": "AGL",
        "flight_count": 5946
      },
      {
        "faa_region": "AEA",
        "flight_count": 4200
      },
      {
        "faa_region": "ASO",
        "flight_count": 3530
      },
      {
        "faa_region": "ANM",
        "flight_count": 1469
      },
      {
        "faa_region": "ACE",
        "flight_count": 1130
      },
      {
        "faa_region": "ANE",
        "flight_count": 956
      },
      {
        "faa_region": "AAL",
        "flight_count": 1
      }
    ]
  },
  {
    "nickname": "American Eagle",
    "by_faa_region": [
      {
        "faa_region": "ASW",
        "flight_count": 11824
      },
      {
        "faa_region": "AGL",
        "flight_count": 1474
      },
      {
        "faa_region": "AEA",
        "flight_count": 1132
      },
      {
        "faa_region": "ANE",
        "flight_count": 665
      },
      {
        "faa_region": "ACE",
        "flight_count": 470
      },
      {
        "faa_region": "ASO",
        "flight_count": 244
      },
      {
        "faa_region": "AWP",
        "flight_count": 43
      },
      {
        "faa_region": "ANM",
        "flight_count": 17
      }
    ]
  }
]

WITH __stage0 AS (
  SELECT
    group_set,
    carriers_0."nickname" as "nickname__0",
    CASE WHEN group_set=1 THEN
      orig_0."faa_region"
      END as "faa_region__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "flight_count__1"
  FROM '../data/flights.parquet' as base
   LEFT JOIN '../data/carriers.parquet' AS carriers_0
    ON base."carrier"=carriers_0."code"
   LEFT JOIN '../data/airports.parquet' AS orig_0
    ON base."destination"=orig_0."code"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,2,3
)
SELECT
  "nickname__0" as "nickname",
  COALESCE(LIST({
    "faa_region": "faa_region__1", 
    "flight_count": "flight_count__1"}  ORDER BY  "flight_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_faa_region"
FROM __stage0
GROUP BY 1
ORDER BY 1 asc NULLS LAST

The Pivot Filtered

You can control which dimension are shown in the pivot with a filter (and their order)

run: flights -> {
  group_by: carriers.nickname
  # pivot
  nest: by_state is  {
    where: orig.state = 'CA' | 'NY' | 'WA'  // only show these states
    group_by: orig.state
    aggregate: flight_count 
    order_by: state   // sort order of the pivoted columns
  }
}

QUERY RESULTS

[
  {
    "nickname": "ATA",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 78
      },
      {
        "state": "NY",
        "flight_count": 208
      },
      {
        "state": "WA",
        "flight_count": 27
      }
    ]
  },
  {
    "nickname": "Alaska",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 2633
      },
      {
        "state": "WA",
        "flight_count": 3257
      }
    ]
  },
  {
    "nickname": "America West",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 2038
      },
      {
        "state": "WA",
        "flight_count": 216
      }
    ]
  },
  {
    "nickname": "American",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 4681
      },
      {
        "state": "NY",
        "flight_count": 2168
      },
      {
        "state": "WA",
        "flight_count": 495
      }
    ]
  },
  {
    "nickname": "American Eagle",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 43
      },
      {
        "state": "NY",
        "flight_count": 961
      }
    ]
  }
]

WITH __stage0 AS (
  SELECT
    group_set,
    carriers_0."nickname" as "nickname__0",
    CASE WHEN group_set=1 THEN
      orig_0."state"
      END as "state__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "flight_count__1"
  FROM '../data/flights.parquet' as base
   LEFT JOIN '../data/airports.parquet' AS orig_0
    ON base."destination"=orig_0."code"
   LEFT JOIN '../data/carriers.parquet' AS carriers_0
    ON base."carrier"=carriers_0."code"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE (group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA')))
  GROUP BY 1,2,3
)
SELECT
  "nickname__0" as "nickname",
  COALESCE(LIST({
    "state": "state__1", 
    "flight_count": "flight_count__1"}  ORDER BY  "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state"
FROM __stage0
GROUP BY 1
ORDER BY 1 asc NULLS LAST

The Pivot with multiple aggreagtes

Pivots can have multiple aggregates. In this case we show flight_count and total_distance for each of the states.

run: flights -> {
  group_by: carriers.nickname
  # pivot
  nest: by_state is  {
    where: orig.state = 'CA' | 'NY' | 'WA'  // only show these states
    group_by: orig.state
    aggregate: 
      flight_count 
      total_distance
    order_by: state   // sort order of the pivoted columns
  }
}

QUERY RESULTS

[
  {
    "nickname": "ATA",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 78,
        "total_distance": 144910
      },
      {
        "state": "NY",
        "flight_count": 208,
        "total_distance": 149936
      },
      {
        "state": "WA",
        "flight_count": 27,
        "total_distance": 46791
      }
    ]
  },
  {
    "nickname": "Alaska",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 2633,
        "total_distance": 2111255
      },
      {
        "state": "WA",
        "flight_count": 3257,
        "total_distance": 2686505
      }
    ]
  },
  {
    "nickname": "America West",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 2038,
        "total_distance": 847343
      },
      {
        "state": "WA",
        "flight_count": 216,
        "total_distance": 216113
      }
    ]
  },
  {
    "nickname": "American",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 4681,
        "total_distance": 7266808
      },
      {
        "state": "NY",
        "flight_count": 2168,
        "total_distance": 3183109
      },
      {
        "state": "WA",
        "flight_count": 495,
        "total_distance": 827090
      }
    ]
  },
  {
    "nickname": "American Eagle",
    "by_state": [
      {
        "state": "CA",
        "flight_count": 43,
        "total_distance": 57945
      },
      {
        "state": "NY",
        "flight_count": 961,
        "total_distance": 324058
      }
    ]
  }
]

WITH __stage0 AS (
  SELECT
    group_set,
    carriers_0."nickname" as "nickname__0",
    CASE WHEN group_set=1 THEN
      orig_0."state"
      END as "state__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "flight_count__1",
    CASE WHEN group_set=1 THEN
      COALESCE(SUM(base."distance"),0)
      END as "total_distance__1"
  FROM '../data/flights.parquet' as base
   LEFT JOIN '../data/airports.parquet' AS orig_0
    ON base."destination"=orig_0."code"
   LEFT JOIN '../data/carriers.parquet' AS carriers_0
    ON base."carrier"=carriers_0."code"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE (group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA')))
  GROUP BY 1,2,3
)
SELECT
  "nickname__0" as "nickname",
  COALESCE(LIST({
    "state": "state__1", 
    "flight_count": "flight_count__1", 
    "total_distance": "total_distance__1"}  ORDER BY  "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state"
FROM __stage0
GROUP BY 1
ORDER BY 1 asc NULLS LAST

Aggregates outside the pivot (and row ordering)

Malloy allows you to intermix unpivoted data along with pivoted data through nesting. Since pivots are nests, any aggregate outside the nest is just shown normally.

run: flights -> {
  group_by: carriers.nickname
  aggregate: 
    total_flights is flight_count   // outside the pivot
    total_distance
  # pivot
  nest: by_state is  {
    where: orig.state = 'CA' | 'NY' | 'WA'  // only show these states
    group_by: orig.state
    aggregate: flight_count 
    order_by: state   // sort order of the pivoted columns
  }
}

QUERY RESULTS

[
  {
    "nickname": "Southwest",
    "total_flights": 88751,
    "total_distance": 54619152,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 19764
      },
      {
        "state": "NY",
        "flight_count": 1898
      },
      {
        "state": "WA",
        "flight_count": 1902
      }
    ]
  },
  {
    "nickname": "USAir",
    "total_flights": 37683,
    "total_distance": 23721642,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 541
      },
      {
        "state": "NY",
        "flight_count": 2791
      },
      {
        "state": "WA",
        "flight_count": 185
      }
    ]
  },
  {
    "nickname": "American",
    "total_flights": 34577,
    "total_distance": 37684885,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 4681
      },
      {
        "state": "NY",
        "flight_count": 2168
      },
      {
        "state": "WA",
        "flight_count": 495
      }
    ]
  },
  {
    "nickname": "Northwest",
    "total_flights": 33580,
    "total_distance": 33376503,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 2203
      },
      {
        "state": "NY",
        "flight_count": 775
      },
      {
        "state": "WA",
        "flight_count": 850
      }
    ]
  },
  {
    "nickname": "United",
    "total_flights": 32757,
    "total_distance": 38882934,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 6873
      },
      {
        "state": "NY",
        "flight_count": 576
      },
      {
        "state": "WA",
        "flight_count": 742
      }
    ]
  }
]

WITH __stage0 AS (
  SELECT
    group_set,
    carriers_0."nickname" as "nickname__0",
    (CASE WHEN group_set=0 THEN
      COUNT(1)
      END) as "total_flights__0",
    CASE WHEN group_set=0 THEN
      COALESCE(SUM(base."distance"),0)
      END as "total_distance__0",
    CASE WHEN group_set=1 THEN
      orig_0."state"
      END as "state__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "flight_count__1"
  FROM '../data/flights.parquet' as base
   LEFT JOIN '../data/airports.parquet' AS orig_0
    ON base."destination"=orig_0."code"
   LEFT JOIN '../data/carriers.parquet' AS carriers_0
    ON base."carrier"=carriers_0."code"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE (group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA')))
  GROUP BY 1,2,5
)
SELECT
  "nickname__0" as "nickname",
  MAX(CASE WHEN group_set=0 THEN "total_flights__0" END) as "total_flights",
  MAX(CASE WHEN group_set=0 THEN "total_distance__0" END) as "total_distance",
  COALESCE(LIST({
    "state": "state__1", 
    "flight_count": "flight_count__1"}  ORDER BY  "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Multiple pivots in the same table

Malloy allows you to intermix unpivoted data along with pivoted data through nesting. Since pivots are nests, any aggregate outside the nest is just shown normally.

run: flights -> {
  group_by: carriers.nickname
  aggregate: 
    total_flights is flight_count   // outside the pivot
  # pivot
  nest: by_state is  {
    where: orig.state = 'CA' | 'NY' | 'WA'  // only show these states
    group_by: orig.state
    aggregate: flight_count 
    order_by: state   // sort order of the pivoted columns
  }
  # pivot
  nest: by_year is  {
    where: dep_time.year > @2003
    group_by: dep_year is dep_time.year
    aggregate: flight_count
  }
}

QUERY RESULTS

[
  {
    "nickname": "Southwest",
    "total_flights": 88751,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 19764
      },
      {
        "state": "NY",
        "flight_count": 1898
      },
      {
        "state": "WA",
        "flight_count": 1902
      }
    ],
    "by_year": [
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 17549
      },
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 14640
      }
    ]
  },
  {
    "nickname": "USAir",
    "total_flights": 37683,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 541
      },
      {
        "state": "NY",
        "flight_count": 2791
      },
      {
        "state": "WA",
        "flight_count": 185
      }
    ],
    "by_year": [
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 7446
      },
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 5388
      }
    ]
  },
  {
    "nickname": "American",
    "total_flights": 34577,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 4681
      },
      {
        "state": "NY",
        "flight_count": 2168
      },
      {
        "state": "WA",
        "flight_count": 495
      }
    ],
    "by_year": [
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 5543
      },
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 5604
      }
    ]
  },
  {
    "nickname": "Northwest",
    "total_flights": 33580,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 2203
      },
      {
        "state": "NY",
        "flight_count": 775
      },
      {
        "state": "WA",
        "flight_count": 850
      }
    ],
    "by_year": [
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 5869
      },
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 6306
      }
    ]
  },
  {
    "nickname": "United",
    "total_flights": 32757,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 6873
      },
      {
        "state": "NY",
        "flight_count": 576
      },
      {
        "state": "WA",
        "flight_count": 742
      }
    ],
    "by_year": [
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 6876
      },
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 6817
      }
    ]
  }
]

WITH __stage0 AS (
  SELECT
    group_set,
    carriers_0."nickname" as "nickname__0",
    (CASE WHEN group_set=0 THEN
      COUNT(1)
      END) as "total_flights__0",
    CASE WHEN group_set=1 THEN
      orig_0."state"
      END as "state__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "flight_count__1",
    CASE WHEN group_set=2 THEN
      DATE_TRUNC('year', base."dep_time")
      END as "dep_year__2",
    CASE WHEN group_set=2 THEN
      COUNT(1)
      END as "flight_count__2"
  FROM '../data/flights.parquet' as base
   LEFT JOIN '../data/airports.parquet' AS orig_0
    ON base."destination"=orig_0."code"
   LEFT JOIN '../data/carriers.parquet' AS carriers_0
    ON base."carrier"=carriers_0."code"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,2,1)) as group_set  ) as group_set
  WHERE ((group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA'))))
  AND ((group_set NOT IN (2) OR (group_set IN (2) AND DATE_TRUNC('year', base."dep_time")>=TIMESTAMP '2004-01-01 00:00:00')))
  GROUP BY 1,2,4,6
)
SELECT
  "nickname__0" as "nickname",
  MAX(CASE WHEN group_set=0 THEN "total_flights__0" END) as "total_flights",
  COALESCE(LIST({
    "state": "state__1", 
    "flight_count": "flight_count__1"}  ORDER BY  "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state",
  COALESCE(LIST({
    "dep_year": "dep_year__2", 
    "flight_count": "flight_count__2"}  ORDER BY  "dep_year__2" desc NULLS LAST) FILTER (WHERE group_set=2),[]) as "by_year"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Ordering by a column in the pivot

Malloy sorts by the first aggregate column in finds, generally. The # hidden tag allows you to not show a column in a result. By adding a measure filtered by the column we are interested in, we can sort the entire table on a column in the pivot.

Rows sorted by 'CA' flights

run: flights -> {
  group_by: carriers.nickname
  # hidden
  aggregate: ca_count is flight_count { where: orig.state = 'CA' }
  # pivot
  nest: by_state is  {
    where: orig.state = 'CA' | 'NY' | 'WA'  // only show these states
    group_by: orig.state
    aggregate: flight_count 
    order_by: state   // sort order of the pivoted columns
  }
}

QUERY RESULTS

[
  {
    "nickname": "Southwest",
    "ca_count": 19764,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 19764
      },
      {
        "state": "NY",
        "flight_count": 1898
      },
      {
        "state": "WA",
        "flight_count": 1902
      }
    ]
  },
  {
    "nickname": "United",
    "ca_count": 6873,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 6873
      },
      {
        "state": "NY",
        "flight_count": 576
      },
      {
        "state": "WA",
        "flight_count": 742
      }
    ]
  },
  {
    "nickname": "American",
    "ca_count": 4681,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 4681
      },
      {
        "state": "NY",
        "flight_count": 2168
      },
      {
        "state": "WA",
        "flight_count": 495
      }
    ]
  },
  {
    "nickname": "Alaska",
    "ca_count": 2633,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 2633
      },
      {
        "state": "WA",
        "flight_count": 3257
      }
    ]
  },
  {
    "nickname": "Northwest",
    "ca_count": 2203,
    "by_state": [
      {
        "state": "CA",
        "flight_count": 2203
      },
      {
        "state": "NY",
        "flight_count": 775
      },
      {
        "state": "WA",
        "flight_count": 850
      }
    ]
  }
]

WITH __stage0 AS (
  SELECT
    group_set,
    carriers_0."nickname" as "nickname__0",
    (CASE WHEN group_set=0 THEN
      COUNT(CASE WHEN orig_0."state"='CA' THEN 1 END)
      END) as "ca_count__0",
    CASE WHEN group_set=1 THEN
      orig_0."state"
      END as "state__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "flight_count__1"
  FROM '../data/flights.parquet' as base
   LEFT JOIN '../data/airports.parquet' AS orig_0
    ON base."destination"=orig_0."code"
   LEFT JOIN '../data/carriers.parquet' AS carriers_0
    ON base."carrier"=carriers_0."code"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE (group_set NOT IN (1) OR (group_set IN (1) AND orig_0."state" IN ('CA','NY','WA')))
  GROUP BY 1,2,4
)
SELECT
  "nickname__0" as "nickname",
  MAX(CASE WHEN group_set=0 THEN "ca_count__0" END) as "ca_count",
  COALESCE(LIST({
    "state": "state__1", 
    "flight_count": "flight_count__1"}  ORDER BY  "state__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Multistage queries in a pivot

VSCode

Malloy Renderer uses the metadata from the query to decide which columns to pivot (dimensions are pivoted, aggreates are not). In multistage queries this information is incorrect. You can manually specify this information with a 'dimensions' parameter on the pivot tag.

run: flights -> {
  group_by: carriers.nickname
  aggregate: total_flights is flight_count
  # pivot { dimensions=["dep_year"] }
  nest: by_year is  {
    group_by: dep_year is dep_time.year
    aggregate: flight_count
    calculate: growth is (flight_count - lag(flight_count, 1)) / flight_count
    order_by: dep_year
  } -> {
    where: dep_year > @2003
    select:
      dep_year
      flight_count
      # percent
      growth
  }
}

QUERY RESULTS

[
  {
    "nickname": "Southwest",
    "total_flights": 88751,
    "by_year": [
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 14640,
        "growth": 0.023224043715846996
      },
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 17549,
        "growth": 0.16576443102171065
      }
    ]
  },
  {
    "nickname": "USAir",
    "total_flights": 37683,
    "by_year": [
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 5388,
        "growth": 0.19803266518188567
      },
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 7446,
        "growth": 0.2763900080580177
      }
    ]
  },
  {
    "nickname": "American",
    "total_flights": 34577,
    "by_year": [
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 5604,
        "growth": 0.006780870806566738
      },
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 5543,
        "growth": -0.011004871008479163
      }
    ]
  },
  {
    "nickname": "Northwest",
    "total_flights": 33580,
    "by_year": [
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 6306,
        "growth": 0.1774500475737393
      },
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 5869,
        "growth": -0.07445902197989436
      }
    ]
  },
  {
    "nickname": "United",
    "total_flights": 32757,
    "by_year": [
      {
        "dep_year": "2004-01-01T00:00:00.000Z",
        "flight_count": 6817,
        "growth": 0.10987237787883233
      },
      {
        "dep_year": "2005-01-01T00:00:00.000Z",
        "flight_count": 6876,
        "growth": 0.008580570098894706
      }
    ]
  }
]

WITH __stage0 AS (
  SELECT
    group_set,
    carriers_0."nickname" as "nickname__0",
    (CASE WHEN group_set=0 THEN
      COUNT(1)
      END) as "total_flights__0",
    CASE WHEN group_set=1 THEN
      DATE_TRUNC('year', base."dep_time")
      END as "dep_year__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "flight_count__1",
    (((CASE WHEN group_set=1 THEN
      COUNT(1)
      END)-(CASE WHEN group_set=1 THEN LAG((CASE WHEN group_set=1 THEN
      COUNT(1)
      END),1) OVER(PARTITION BY group_set, carriers_0."nickname"  ORDER BY  CASE WHEN group_set=1 THEN
      DATE_TRUNC('year', base."dep_time")
      END ASC NULLS LAST ) END)))*1.0/(CASE WHEN group_set=1 THEN
      COUNT(1)
      END) as "growth__1"
  FROM '../data/flights.parquet' as base
   LEFT JOIN '../data/carriers.parquet' AS carriers_0
    ON base."carrier"=carriers_0."code"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,2,4
)
SELECT
  "nickname__0" as "nickname",
  MAX(CASE WHEN group_set=0 THEN "total_flights__0" END) as "total_flights",
  (WITH __stage0 AS (
    SELECT 
       base."dep_year" as "dep_year",
       base."flight_count" as "flight_count",
       base."growth" as "growth"
    FROM (SELECT UNNEST(COALESCE(LIST({
      "dep_year": "dep_year__1", 
      "flight_count": "flight_count__1", 
      "growth": "growth__1"}  ORDER BY  "dep_year__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[])) as base) as base
    WHERE base."dep_year">=TIMESTAMP '2004-01-01 00:00:00'
  )
  SELECT LIST(STRUCT_PACK("dep_year","flight_count","growth")) FROM __stage0
  ) as "by_year"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST