Malloy Documentation
search

Often you want to limit the number of group-by values in a table, and bucket everything else into an 'other' category.

In the top_states_by_eleveation query below, we have a query with two stages. The first stage calculates the top states and nests the data to be aggregated. The second pipeline stage produces the actual aggregation.

document
source: airports is duckdb.table('../data/airports.parquet') extend {
  measure: 
    airport_count is count()
    avg_elevation is elevation.avg()

  view: top_states_by_elevation is {
    group_by: state
    aggregate: avg_elevation
    calculate: is_other is row_number() > 5
    nest: data is  {  
      group_by: code, elevation
    }
  } -> {
    group_by: state is 
      pick state when not is_other
      else 'OTHER'
    aggregate: 
      avg_elevation is data.elevation.avg()
      airport_count is data.count()
      # hidden
      sort is data.count() {where: not is_other }
      order_by: sort desc
  }
}

Basic Query

document
run: airports -> top_states_by_elevation
QUERY RESULTS
stateavg_​elevationairport_​count
CO6,255.864425
NM5,419.635181
UT5,066140
NV4,029.977128
WY5,619.365115
OTHER910.34618,781
[
  {
    "state": "CO",
    "avg_elevation": 6255.863529411765,
    "airport_count": 425,
    "sort": 425
  },
  {
    "state": "NM",
    "avg_elevation": 5419.635359116022,
    "airport_count": 181,
    "sort": 181
  },
  {
    "state": "UT",
    "avg_elevation": 5066,
    "airport_count": 140,
    "sort": 140
  },
  {
    "state": "NV",
    "avg_elevation": 4029.9765625,
    "airport_count": 128,
    "sort": 128
  },
  {
    "state": "WY",
    "avg_elevation": 5619.365217391304,
    "airport_count": 115,
    "sort": 115
  },
  {
    "state": "OTHER",
    "avg_elevation": 910.3459902148479,
    "airport_count": 18781,
    "sort": 0
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    airports."state" as "state__0",
    CASE WHEN group_set=0 THEN
      AVG(airports."elevation")
      END as "avg_elevation__0",
    ROW_NUMBER() OVER(PARTITION BY group_set  ORDER BY  CASE WHEN group_set=0 THEN
      AVG(airports."elevation")
      END desc NULLS LAST )>5 as "is_other__0",
    CASE WHEN group_set=1 THEN
      airports."code"
      END as "code__1",
    CASE WHEN group_set=1 THEN
      airports."elevation"
      END as "elevation__1"
  FROM '../data/airports.parquet' as airports
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,2,5,6
)
, __stage1 AS (
  SELECT
    "state__0" as "state",
    MAX(CASE WHEN group_set=0 THEN "avg_elevation__0" END) as "avg_elevation",
    MAX(CASE WHEN group_set=0 THEN "is_other__0" END) as "is_other",
    COALESCE(LIST({
      "code": "code__1", 
      "elevation": "elevation__1"}  ORDER BY  "code__1" asc NULLS LAST) FILTER (WHERE group_set=1),[]) as "data"
  FROM __stage0
  GROUP BY 1
)
SELECT 
   CASE WHEN COALESCE(NOT(base."is_other"),FALSE) THEN base."state" ELSE 'OTHER' END as "state",
   AVG(data_0."elevation") as "avg_elevation",
   COUNT(DISTINCT base."state" || 'x' || data_0_outer.__row_id) as "airport_count",
   COUNT(DISTINCT CASE WHEN COALESCE(NOT(base."is_other"),FALSE) THEN base."state" || 'x' || data_0_outer.__row_id END) as "sort"
FROM __stage1 as base
LEFT JOIN LATERAL (SELECT UNNEST(GENERATE_SERIES(1, length(base."data"),1)) as __row_id, UNNEST(base."data"), 1 as ignoreme) as data_0_outer(__row_id, data_0,ignoreme) ON  data_0_outer.ignoreme=1
GROUP BY 1
ORDER BY 4 desc NULLS LAST

Nested Query

document
run: airports -> {
  group_by: `Facility Type` is fac_type
  aggregate: 
    airport_count
    avg_elevation
  nest: top_states_by_elevation
}
QUERY RESULTS
Facility Typeairport_​countavg_​elevationtop_​states_​by_​elevation
AIRPORT13,9251,237.044
stateavg_​elevationairport_​count
OTHER1,168.62611,128
TX1,039.9521,389
OH931.236537
OK1,097.084345
ID3,963196
KY737.641142
UT5,257.18697
WY5,570.03391
HELIPORT5,135950.513
stateavg_​elevationairport_​count
OTHER715.6114,916
CO6,816.57165
MT3,756.3129
NM5,170.7625
SEAPLANE BASE473488.822
stateavg_​elevationairport_​count
MT3,1942
NM4,2011
NE1,9461
SD1,6001
OTHER448.499463
ID1,884.65
ULTRALIGHT125806.144
stateavg_​elevationairport_​count
OTHER874.32580
LA85.11118
IN735.88217
AZ2,025.7147
KY866.6673
STOLPORT861,375.047
stateavg_​elevationairport_​count
AZ4,7721
OTHER1,155.43562
FL8713
CO6,211.6676
NC8614
GLIDERPORT371,611.405
stateavg_​elevationairport_​count
OTHER1,522.51729
AZ3,5392
KS2,088.52
NY1,484.52
IL622.52
BALLOONPORT121,047.25
stateavg_​elevationairport_​count
NJ2652
NC8001
MA4651
RI2551
OTHER1,502.4297
[
  {
    "Facility Type": "AIRPORT",
    "airport_count": 13925,
    "avg_elevation": 1237.0441651705567,
    "top_states_by_elevation": [
      {
        "state": "OTHER",
        "avg_elevation": 1168.6255391804457,
        "airport_count": 11128,
        "sort": 0
      },
      {
        "state": "TX",
        "avg_elevation": 1039.9524838012958,
        "airport_count": 1389,
        "sort": 1389
      },
      {
        "state": "OH",
        "avg_elevation": 931.2364990689013,
        "airport_count": 537,
        "sort": 537
      },
      {
        "state": "OK",
        "avg_elevation": 1097.0840579710145,
        "airport_count": 345,
        "sort": 345
      },
      {
        "state": "ID",
        "avg_elevation": 3963,
        "airport_count": 196,
        "sort": 196
      },
      {
        "state": "KY",
        "avg_elevation": 737.6408450704225,
        "airport_count": 142,
        "sort": 142
      },
      {
        "state": "UT",
        "avg_elevation": 5257.18556701031,
        "airport_count": 97,
        "sort": 97
      },
      {
        "state": "WY",
        "avg_elevation": 5570.0329670329675,
        "airport_count": 91,
        "sort": 91
      }
    ]
  },
  {
    "Facility Type": "HELIPORT",
    "airport_count": 5135,
    "avg_elevation": 950.5125608568646,
    "top_states_by_elevation": [
      {
        "state": "OTHER",
        "avg_elevation": 715.6114727420667,
        "airport_count": 4916,
        "sort": 0
      },
      {
        "state": "CO",
        "avg_elevation": 6816.569696969697,
        "airport_count": 165,
        "sort": 165
      },
      {
        "state": "MT",
        "avg_elevation": 3756.310344827586,
        "airport_count": 29,
        "sort": 29
      },
      {
        "state": "NM",
        "avg_elevation": 5170.76,
        "airport_count": 25,
        "sort": 25
      }
    ]
  },
  {
    "Facility Type": "SEAPLANE BASE",
    "airport_count": 473,
    "avg_elevation": 488.82241014799155,
    "top_states_by_elevation": [
      {
        "state": "MT",
        "avg_elevation": 3194,
        "airport_count": 2,
        "sort": 2
      },
      {
        "state": "NM",
        "avg_elevation": 4201,
        "airport_count": 1,
        "sort": 1
      },
      {
        "state": "NE",
        "avg_elevation": 1946,
        "airport_count": 1,
        "sort": 1
      },
      {
        "state": "SD",
        "avg_elevation": 1600,
        "airport_count": 1,
        "sort": 1
      },
      {
        "state": "OTHER",
        "avg_elevation": 448.49892008639307,
        "airport_count": 463,
        "sort": 0
      },
      {
        "state": "ID",
        "avg_elevation": 1884.6,
        "airport_count": 5,
        "sort": 5
      }
    ]
  },
  {
    "Facility Type": "ULTRALIGHT",
    "airport_count": 125,
    "avg_elevation": 806.144,
    "top_states_by_elevation": [
      {
        "state": "OTHER",
        "avg_elevation": 874.325,
        "airport_count": 80,
        "sort": 0
      },
      {
        "state": "LA",
        "avg_elevation": 85.11111111111111,
        "airport_count": 18,
        "sort": 18
      },
      {
        "state": "IN",
        "avg_elevation": 735.8823529411765,
        "airport_count": 17,
        "sort": 17
      },
      {
        "state": "AZ",
        "avg_elevation": 2025.7142857142858,
        "airport_count": 7,
        "sort": 7
      },
      {
        "state": "KY",
        "avg_elevation": 866.6666666666666,
        "airport_count": 3,
        "sort": 3
      }
    ]
  },
  {
    "Facility Type": "STOLPORT",
    "airport_count": 86,
    "avg_elevation": 1375.046511627907,
    "top_states_by_elevation": [
      {
        "state": "AZ",
        "avg_elevation": 4772,
        "airport_count": 1,
        "sort": 1
      },
      {
        "state": "OTHER",
        "avg_elevation": 1155.4354838709678,
        "airport_count": 62,
        "sort": 0
      },
      {
        "state": "FL",
        "avg_elevation": 87,
        "airport_count": 13,
        "sort": 13
      },
      {
        "state": "CO",
        "avg_elevation": 6211.666666666667,
        "airport_count": 6,
        "sort": 6
      },
      {
        "state": "NC",
        "avg_elevation": 861,
        "airport_count": 4,
        "sort": 4
      }
    ]
  },
  {
    "Facility Type": "GLIDERPORT",
    "airport_count": 37,
    "avg_elevation": 1611.4054054054054,
    "top_states_by_elevation": [
      {
        "state": "OTHER",
        "avg_elevation": 1522.5172413793102,
        "airport_count": 29,
        "sort": 0
      },
      {
        "state": "AZ",
        "avg_elevation": 3539,
        "airport_count": 2,
        "sort": 2
      },
      {
        "state": "KS",
        "avg_elevation": 2088.5,
        "airport_count": 2,
        "sort": 2
      },
      {
        "state": "NY",
        "avg_elevation": 1484.5,
        "airport_count": 2,
        "sort": 2
      },
      {
        "state": "IL",
        "avg_elevation": 622.5,
        "airport_count": 2,
        "sort": 2
      }
    ]
  },
  {
    "Facility Type": "BALLOONPORT",
    "airport_count": 12,
    "avg_elevation": 1047.25,
    "top_states_by_elevation": [
      {
        "state": "NJ",
        "avg_elevation": 265,
        "airport_count": 2,
        "sort": 2
      },
      {
        "state": "NC",
        "avg_elevation": 800,
        "airport_count": 1,
        "sort": 1
      },
      {
        "state": "MA",
        "avg_elevation": 465,
        "airport_count": 1,
        "sort": 1
      },
      {
        "state": "RI",
        "avg_elevation": 255,
        "airport_count": 1,
        "sort": 1
      },
      {
        "state": "OTHER",
        "avg_elevation": 1502.4285714285713,
        "airport_count": 7,
        "sort": 0
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    airports."fac_type" as "Facility Type__0",
    CASE WHEN group_set=0 THEN
      COUNT( 1)
      END as "airport_count__0",
    CASE WHEN group_set=0 THEN
      AVG(airports."elevation")
      END as "avg_elevation__0",
    CASE WHEN group_set IN (1,2) THEN
      airports."state"
      END as "state__1",
    CASE WHEN group_set=1 THEN
      AVG(airports."elevation")
      END as "avg_elevation__1",
    ROW_NUMBER() OVER(PARTITION BY group_set, airports."fac_type"  ORDER BY  CASE WHEN group_set=1 THEN
      AVG(airports."elevation")
      END desc NULLS LAST )>5 as "is_other__1",
    CASE WHEN group_set=2 THEN
      airports."code"
      END as "code__2",
    CASE WHEN group_set=2 THEN
      airports."elevation"
      END as "elevation__2"
  FROM '../data/airports.parquet' as airports
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,2,1)) as group_set  ) as group_set
  GROUP BY 1,2,5,8,9
)
, __stage1 AS (
  SELECT 
    CASE WHEN group_set=2 THEN 1  ELSE group_set END as group_set,
    "Facility Type__0" as "Facility Type__0",
    FIRST("airport_count__0") FILTER (WHERE "airport_count__0" IS NOT NULL) as "airport_count__0",
    FIRST("avg_elevation__0") FILTER (WHERE "avg_elevation__0" IS NOT NULL) as "avg_elevation__0",
    CASE WHEN group_set IN (1,2) THEN
      "state__1"
      END as "state__1",
    FIRST("avg_elevation__1") FILTER (WHERE "avg_elevation__1" IS NOT NULL) as "avg_elevation__1",
    FIRST("is_other__1") FILTER (WHERE "is_other__1" IS NOT NULL) as "is_other__1",
    COALESCE(LIST({
      "code": "code__2", 
      "elevation": "elevation__2"}  ORDER BY  "code__2" asc NULLS LAST) FILTER (WHERE group_set=2),[]) as "data__1"
  FROM __stage0
  GROUP BY 1,2,5
)
SELECT
  "Facility Type__0" as "Facility Type",
  MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
  MAX(CASE WHEN group_set=0 THEN "avg_elevation__0" END) as "avg_elevation",
  (WITH __stage0 AS (
    SELECT 
       CASE WHEN COALESCE(NOT(base."is_other"),FALSE) THEN base."state" ELSE 'OTHER' END as "state",
       AVG(data_0."elevation") as "avg_elevation",
       COUNT(DISTINCT base."__distinct_key" || 'x' || data_0_outer.__row_id) as "airport_count",
       COUNT(DISTINCT CASE WHEN COALESCE(NOT(base."is_other"),FALSE) THEN base."__distinct_key" || 'x' || data_0_outer.__row_id END) as "sort"
    FROM (SELECT GEN_RANDOM_UUID() as __distinct_key, x.*  FROM (SELECT UNNEST(COALESCE(LIST({
      "state": "state__1", 
      "avg_elevation": "avg_elevation__1", 
      "is_other": "is_other__1", 
      "data": "data__1"}  ORDER BY  "avg_elevation__1" desc NULLS LAST) FILTER (WHERE group_set=1),[])) as base) as x) as base
    LEFT JOIN LATERAL (SELECT UNNEST(GENERATE_SERIES(1, length(base."data"),1)) as __row_id, UNNEST(base."data"), 1 as ignoreme) as data_0_outer(__row_id, data_0,ignoreme) ON  data_0_outer.ignoreme=1
    GROUP BY 1
    ORDER BY 4 desc NULLS LAST
  )
  SELECT LIST(STRUCT_PACK("state","avg_elevation","airport_count","sort")) FROM __stage0
  ) as "top_states_by_elevation"
FROM __stage1
GROUP BY 1
ORDER BY 2 desc NULLS LAST