Malloy Documentation
search

This guide covers advanced Malloy features for building sophisticated semantic models. It assumes you've completed Building a Semantic Model and are familiar with sources, dimensions, measures, views, and basic joins.


Setup

The examples below use these source definitions:

document
source: airports is duckdb.table('../data/airports.parquet') extend {
  measure: airport_count is count()
  measure: avg_elevation is elevation.avg()

  view: by_state is {
    group_by: state
    aggregate: airport_count
  }

  view: top_5_states is {
    group_by: state
    aggregate: airport_count
    limit: 5
  }

  view: by_facility_type is {
    group_by: fac_type
    aggregate: airport_count
  }
}

Nesting Queries

The nest: property embeds one query inside another, creating subtables within your results. This is powerful for understanding data hierarchically.

Basic Nesting

Group by state, then nest facility types within each state:

document
run: airports -> {
  group_by: state
  aggregate: airport_count
  limit: 5
  nest: by_facility_type is {
    group_by: fac_type
    aggregate: airport_count
  }
}
QUERY RESULTS
[
  {
    "state": "TX",
    "airport_count": 1845,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 1389
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 435
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 8
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 8
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 5
      }
    ]
  },
  {
    "state": "CA",
    "airport_count": 984,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 569
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 396
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 12
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 3
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 2
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 2
      }
    ]
  },
  {
    "state": "IL",
    "airport_count": 890,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 625
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 245
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 8
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 6
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 2
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 2
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 2
      }
    ]
  },
  {
    "state": "FL",
    "airport_count": 856,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 511
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 280
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 43
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 13
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 5
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 4
      }
    ]
  },
  {
    "state": "PA",
    "airport_count": 804,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 468
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 307
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 13
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 10
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 3
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 3
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    base."state" as "state__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "airport_count__0",
    CASE WHEN group_set=1 THEN
      base."fac_type"
      END as "fac_type__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "airport_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,2,4
)
SELECT
  "state__0" as "state",
  MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
  COALESCE(LIST({
    "fac_type": "fac_type__1", 
    "airport_count": "airport_count__1"}  ORDER BY  "airport_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_facility_type"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST
LIMIT 5

Multiple Nested Views

Queries can contain multiple nested views:

document
run: airports -> {
  group_by: faa_region
  aggregate: airport_count
  nest: top_5_states
  nest: by_facility_type
}
QUERY RESULTS
[
  {
    "faa_region": "AGL",
    "airport_count": 4437,
    "top_5_states": [
      {
        "state": "IL",
        "airport_count": 890
      },
      {
        "state": "OH",
        "airport_count": 749
      },
      {
        "state": "IN",
        "airport_count": 643
      },
      {
        "state": "WI",
        "airport_count": 543
      },
      {
        "state": "MN",
        "airport_count": 507
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 3443
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 826
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 119
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 30
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 11
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 4
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 4
      }
    ]
  },
  {
    "faa_region": "ASW",
    "airport_count": 3268,
    "top_5_states": [
      {
        "state": "TX",
        "airport_count": 1845
      },
      {
        "state": "LA",
        "airport_count": 500
      },
      {
        "state": "OK",
        "airport_count": 443
      },
      {
        "state": "AR",
        "airport_count": 299
      },
      {
        "state": "NM",
        "airport_count": 181
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 2341
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 861
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 32
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 19
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 9
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 6
      }
    ]
  },
  {
    "faa_region": "ASO",
    "airport_count": 2924,
    "top_5_states": [
      {
        "state": "FL",
        "airport_count": 856
      },
      {
        "state": "GA",
        "airport_count": 440
      },
      {
        "state": "NC",
        "airport_count": 400
      },
      {
        "state": "TN",
        "airport_count": 285
      },
      {
        "state": "AL",
        "airport_count": 260
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 2038
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 770
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 57
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 33
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 17
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 8
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 1
      }
    ]
  },
  {
    "faa_region": "AEA",
    "airport_count": 2586,
    "top_5_states": [
      {
        "state": "PA",
        "airport_count": 804
      },
      {
        "state": "NY",
        "airport_count": 576
      },
      {
        "state": "VA",
        "airport_count": 421
      },
      {
        "state": "NJ",
        "airport_count": 378
      },
      {
        "state": "MD",
        "airport_count": 229
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 1525
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 964
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 61
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 18
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 8
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 7
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 3
      }
    ]
  },
  {
    "faa_region": "ANM",
    "airport_count": 2102,
    "top_5_states": [
      {
        "state": "WA",
        "airport_count": 484
      },
      {
        "state": "OR",
        "airport_count": 441
      },
      {
        "state": "CO",
        "airport_count": 425
      },
      {
        "state": "MT",
        "airport_count": 259
      },
      {
        "state": "ID",
        "airport_count": 238
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 1524
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 527
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 25
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 13
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 8
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 4
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 1
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    base."faa_region" as "faa_region__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "airport_count__0",
    CASE WHEN group_set=1 THEN
      base."state"
      END as "state__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "airport_count__1",
    CASE WHEN group_set=2 THEN
      base."fac_type"
      END as "fac_type__2",
    CASE WHEN group_set=2 THEN
      COUNT(1)
      END as "airport_count__2"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,2,1)) as group_set  ) as group_set
  GROUP BY 1,2,4,6
)
SELECT
  "faa_region__0" as "faa_region",
  MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
  COALESCE(LIST({
    "state": "state__1", 
    "airport_count": "airport_count__1"}  ORDER BY  "airport_count__1" desc NULLS LAST) FILTER (WHERE group_set=1)[1:5],[]) as "top_5_states",
  COALESCE(LIST({
    "fac_type": "fac_type__2", 
    "airport_count": "airport_count__2"}  ORDER BY  "airport_count__2" desc NULLS LAST) FILTER (WHERE group_set=2),[]) as "by_facility_type"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Deep Nesting

Views can be nested to any depth:

document
run: airports -> {
  group_by: faa_region
  aggregate: airport_count
  nest: by_state_and_county is {
    group_by: state
    aggregate: airport_count
    nest: by_county is {
      group_by: county
      aggregate: airport_count
      limit: 4
    }
  }
  nest: by_facility_type
}
QUERY RESULTS
[
  {
    "faa_region": "AGL",
    "airport_count": 4437,
    "by_state_and_county": [
      {
        "state": "IL",
        "airport_count": 890,
        "by_county": [
          {
            "county": "COOK",
            "airport_count": 51
          },
          {
            "county": "LA SALLE",
            "airport_count": 39
          },
          {
            "county": "MC HENRY",
            "airport_count": 29
          },
          {
            "county": "DE KALB",
            "airport_count": 27
          }
        ]
      },
      {
        "state": "OH",
        "airport_count": 749,
        "by_county": [
          {
            "county": "CUYAHOGA",
            "airport_count": 27
          },
          {
            "county": "FRANKLIN",
            "airport_count": 27
          },
          {
            "county": "STARK",
            "airport_count": 23
          },
          {
            "county": "MONTGOMERY",
            "airport_count": 22
          }
        ]
      },
      {
        "state": "IN",
        "airport_count": 643,
        "by_county": [
          {
            "county": "MARION",
            "airport_count": 27
          },
          {
            "county": "ALLEN",
            "airport_count": 24
          },
          {
            "county": "HAMILTON",
            "airport_count": 20
          },
          {
            "county": "MARSHALL",
            "airport_count": 18
          }
        ]
      },
      {
        "state": "WI",
        "airport_count": 543,
        "by_county": [
          {
            "county": "DANE",
            "airport_count": 30
          },
          {
            "county": "WALWORTH",
            "airport_count": 22
          },
          {
            "county": "WINNEBAGO",
            "airport_count": 17
          },
          {
            "county": "ST CROIX",
            "airport_count": 16
          }
        ]
      },
      {
        "state": "MN",
        "airport_count": 507,
        "by_county": [
          {
            "county": "ST LOUIS",
            "airport_count": 28
          },
          {
            "county": "HENNEPIN",
            "airport_count": 23
          },
          {
            "county": "CROW WING",
            "airport_count": 17
          },
          {
            "county": "DAKOTA",
            "airport_count": 17
          }
        ]
      },
      {
        "state": "MI",
        "airport_count": 489,
        "by_county": [
          {
            "county": "OAKLAND",
            "airport_count": 25
          },
          {
            "county": "KENT",
            "airport_count": 24
          },
          {
            "county": "WAYNE",
            "airport_count": 17
          },
          {
            "county": "LIVINGSTON",
            "airport_count": 15
          }
        ]
      },
      {
        "state": "ND",
        "airport_count": 436,
        "by_county": [
          {
            "county": "CASS",
            "airport_count": 39
          },
          {
            "county": "RICHLAND",
            "airport_count": 20
          },
          {
            "county": "MC LEAN",
            "airport_count": 20
          },
          {
            "county": "WARD",
            "airport_count": 19
          }
        ]
      },
      {
        "state": "SD",
        "airport_count": 180,
        "by_county": [
          {
            "county": "PENNINGTON",
            "airport_count": 7
          },
          {
            "county": "MINNEHAHA",
            "airport_count": 7
          },
          {
            "county": "MEADE",
            "airport_count": 7
          },
          {
            "county": "PERKINS",
            "airport_count": 6
          }
        ]
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 3443
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 826
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 119
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 30
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 11
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 4
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 4
      }
    ]
  },
  {
    "faa_region": "ASW",
    "airport_count": 3268,
    "by_state_and_county": [
      {
        "state": "TX",
        "airport_count": 1845,
        "by_county": [
          {
            "county": "HARRIS",
            "airport_count": 135
          },
          {
            "county": "TARRANT",
            "airport_count": 63
          },
          {
            "county": "DENTON",
            "airport_count": 53
          },
          {
            "county": "DALLAS",
            "airport_count": 42
          }
        ]
      },
      {
        "state": "LA",
        "airport_count": 500,
        "by_county": [
          {
            "county": "PLAQUEMINES",
            "airport_count": 31
          },
          {
            "county": "VERMILION",
            "airport_count": 29
          },
          {
            "county": "CALCASIEU",
            "airport_count": 23
          },
          {
            "county": "LAFOURCHE",
            "airport_count": 21
          }
        ]
      },
      {
        "state": "OK",
        "airport_count": 443,
        "by_county": [
          {
            "county": "OKLAHOMA",
            "airport_count": 31
          },
          {
            "county": "TULSA",
            "airport_count": 25
          },
          {
            "county": "ROGERS",
            "airport_count": 16
          },
          {
            "county": "DELAWARE",
            "airport_count": 13
          }
        ]
      },
      {
        "state": "AR",
        "airport_count": 299,
        "by_county": [
          {
            "county": "PULASKI",
            "airport_count": 20
          },
          {
            "county": "BENTON",
            "airport_count": 19
          },
          {
            "county": "LONOKE",
            "airport_count": 13
          },
          {
            "county": "MISSISSIPPI",
            "airport_count": 10
          }
        ]
      },
      {
        "state": "NM",
        "airport_count": 181,
        "by_county": [
          {
            "county": "CATRON",
            "airport_count": 13
          },
          {
            "county": "LINCOLN",
            "airport_count": 10
          },
          {
            "county": "LUNA",
            "airport_count": 9
          },
          {
            "county": "BERNALILLO",
            "airport_count": 9
          }
        ]
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 2341
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 861
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 32
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 19
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 9
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 6
      }
    ]
  },
  {
    "faa_region": "ASO",
    "airport_count": 2924,
    "by_state_and_county": [
      {
        "state": "FL",
        "airport_count": 856,
        "by_county": [
          {
            "county": "PALM BEACH",
            "airport_count": 45
          },
          {
            "county": "DADE",
            "airport_count": 44
          },
          {
            "county": "POLK",
            "airport_count": 43
          },
          {
            "county": "MARION",
            "airport_count": 37
          }
        ]
      },
      {
        "state": "GA",
        "airport_count": 440,
        "by_county": [
          {
            "county": "FULTON",
            "airport_count": 22
          },
          {
            "county": "PIKE",
            "airport_count": 17
          },
          {
            "county": "CARROLL",
            "airport_count": 14
          },
          {
            "county": "FAYETTE",
            "airport_count": 12
          }
        ]
      },
      {
        "state": "NC",
        "airport_count": 400,
        "by_county": [
          {
            "county": "WAKE",
            "airport_count": 15
          },
          {
            "county": "ROWAN",
            "airport_count": 14
          },
          {
            "county": "UNION",
            "airport_count": 14
          },
          {
            "county": "MECKLENBURG",
            "airport_count": 14
          }
        ]
      },
      {
        "state": "TN",
        "airport_count": 285,
        "by_county": [
          {
            "county": "SHELBY",
            "airport_count": 24
          },
          {
            "county": "DAVIDSON",
            "airport_count": 17
          },
          {
            "county": "KNOX",
            "airport_count": 14
          },
          {
            "county": "RUTHERFORD",
            "airport_count": 10
          }
        ]
      },
      {
        "state": "AL",
        "airport_count": 260,
        "by_county": [
          {
            "county": "BALDWIN",
            "airport_count": 25
          },
          {
            "county": "JEFFERSON",
            "airport_count": 19
          },
          {
            "county": "MOBILE",
            "airport_count": 17
          },
          {
            "county": "MADISON",
            "airport_count": 14
          }
        ]
      },
      {
        "state": "MS",
        "airport_count": 243,
        "by_county": [
          {
            "county": "WASHINGTON",
            "airport_count": 15
          },
          {
            "county": "HARRISON",
            "airport_count": 12
          },
          {
            "county": "HINDS",
            "airport_count": 11
          },
          {
            "county": "BOLIVAR",
            "airport_count": 9
          }
        ]
      },
      {
        "state": "KY",
        "airport_count": 202,
        "by_county": [
          {
            "county": "JEFFERSON",
            "airport_count": 13
          },
          {
            "county": "FAYETTE",
            "airport_count": 7
          },
          {
            "county": "MC LEAN",
            "airport_count": 6
          },
          {
            "county": "BOONE",
            "airport_count": 6
          }
        ]
      },
      {
        "state": "SC",
        "airport_count": 189,
        "by_county": [
          {
            "county": "GREENVILLE",
            "airport_count": 13
          },
          {
            "county": "CHARLESTON",
            "airport_count": 9
          },
          {
            "county": "BEAUFORT",
            "airport_count": 9
          },
          {
            "county": "LEXINGTON",
            "airport_count": 9
          }
        ]
      },
      {
        "state": "PR",
        "airport_count": 40,
        "by_county": [
          {
            "county": "--PUERTO RICO",
            "airport_count": 40
          }
        ]
      },
      {
        "state": "VI",
        "airport_count": 9,
        "by_county": [
          {
            "county": "-VIRGIN ISLANDS-",
            "airport_count": 9
          }
        ]
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 2038
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 770
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 57
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 33
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 17
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 8
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 1
      }
    ]
  },
  {
    "faa_region": "AEA",
    "airport_count": 2586,
    "by_state_and_county": [
      {
        "state": "PA",
        "airport_count": 804,
        "by_county": [
          {
            "county": "BUCKS",
            "airport_count": 55
          },
          {
            "county": "MONTGOMERY",
            "airport_count": 44
          },
          {
            "county": "ALLEGHENY",
            "airport_count": 31
          },
          {
            "county": "CHESTER",
            "airport_count": 27
          }
        ]
      },
      {
        "state": "NY",
        "airport_count": 576,
        "by_county": [
          {
            "county": "SUFFOLK",
            "airport_count": 34
          },
          {
            "county": "ERIE",
            "airport_count": 26
          },
          {
            "county": "NIAGARA",
            "airport_count": 20
          },
          {
            "county": "DUTCHESS",
            "airport_count": 20
          }
        ]
      },
      {
        "state": "VA",
        "airport_count": 421,
        "by_county": [
          {
            "county": "FAUQUIER",
            "airport_count": 23
          },
          {
            "county": "SHENANDOAH",
            "airport_count": 13
          },
          {
            "county": "ACCOMACK",
            "airport_count": 12
          },
          {
            "county": "FAIRFAX",
            "airport_count": 12
          }
        ]
      },
      {
        "state": "NJ",
        "airport_count": 378,
        "by_county": [
          {
            "county": "MONMOUTH",
            "airport_count": 31
          },
          {
            "county": "MIDDLESEX",
            "airport_count": 29
          },
          {
            "county": "BURLINGTON",
            "airport_count": 29
          },
          {
            "county": "HUNTERDON",
            "airport_count": 27
          }
        ]
      },
      {
        "state": "MD",
        "airport_count": 229,
        "by_county": [
          {
            "county": "BALTIMORE",
            "airport_count": 24
          },
          {
            "county": "CARROLL",
            "airport_count": 14
          },
          {
            "county": "ANNE ARUNDEL",
            "airport_count": 14
          },
          {
            "county": "PRINCE GEORGES",
            "airport_count": 14
          }
        ]
      },
      {
        "state": "WV",
        "airport_count": 116,
        "by_county": [
          {
            "county": "KANAWHA",
            "airport_count": 8
          },
          {
            "county": "MASON",
            "airport_count": 7
          },
          {
            "county": "RALEIGH",
            "airport_count": 6
          },
          {
            "county": "PRESTON",
            "airport_count": 6
          }
        ]
      },
      {
        "state": "DE",
        "airport_count": 42,
        "by_county": [
          {
            "county": "KENT",
            "airport_count": 17
          },
          {
            "county": "SUSSEX",
            "airport_count": 14
          },
          {
            "county": "NEW CASTLE",
            "airport_count": 11
          }
        ]
      },
      {
        "state": "DC",
        "airport_count": 20,
        "by_county": [
          {
            "county": "WASHINGTON",
            "airport_count": 18
          },
          {
            "county": "ARLINGTON",
            "airport_count": 1
          },
          {
            "county": "LOUDOUN",
            "airport_count": 1
          }
        ]
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 1525
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 964
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 61
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 18
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 8
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 7
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 3
      }
    ]
  },
  {
    "faa_region": "ANM",
    "airport_count": 2102,
    "by_state_and_county": [
      {
        "state": "WA",
        "airport_count": 484,
        "by_county": [
          {
            "county": "KING",
            "airport_count": 61
          },
          {
            "county": "PIERCE",
            "airport_count": 27
          },
          {
            "county": "SPOKANE",
            "airport_count": 26
          },
          {
            "county": "SNOHOMISH",
            "airport_count": 25
          }
        ]
      },
      {
        "state": "OR",
        "airport_count": 441,
        "by_county": [
          {
            "county": "CLACKAMAS",
            "airport_count": 34
          },
          {
            "county": "LINN",
            "airport_count": 26
          },
          {
            "county": "LANE",
            "airport_count": 24
          },
          {
            "county": "WASHINGTON",
            "airport_count": 24
          }
        ]
      },
      {
        "state": "CO",
        "airport_count": 425,
        "by_county": [
          {
            "county": "WELD",
            "airport_count": 40
          },
          {
            "county": "EL PASO",
            "airport_count": 26
          },
          {
            "county": "ADAMS",
            "airport_count": 23
          },
          {
            "county": "JEFFERSON",
            "airport_count": 21
          }
        ]
      },
      {
        "state": "MT",
        "airport_count": 259,
        "by_county": [
          {
            "county": "FLATHEAD",
            "airport_count": 20
          },
          {
            "county": "LEWIS AND CLARK",
            "airport_count": 16
          },
          {
            "county": "MISSOULA",
            "airport_count": 14
          },
          {
            "county": "GALLATIN",
            "airport_count": 11
          }
        ]
      },
      {
        "state": "ID",
        "airport_count": 238,
        "by_county": [
          {
            "county": "VALLEY",
            "airport_count": 27
          },
          {
            "county": "KOOTENAI",
            "airport_count": 21
          },
          {
            "county": "IDAHO",
            "airport_count": 18
          },
          {
            "county": "BONNER",
            "airport_count": 18
          }
        ]
      },
      {
        "state": "UT",
        "airport_count": 140,
        "by_county": [
          {
            "county": "SALT LAKE",
            "airport_count": 21
          },
          {
            "county": "SAN JUAN",
            "airport_count": 13
          },
          {
            "county": "UTAH",
            "airport_count": 10
          },
          {
            "county": "IRON",
            "airport_count": 9
          }
        ]
      },
      {
        "state": "WY",
        "airport_count": 115,
        "by_county": [
          {
            "county": "LARAMIE",
            "airport_count": 12
          },
          {
            "county": "PARK",
            "airport_count": 9
          },
          {
            "county": "CAMPBELL",
            "airport_count": 9
          },
          {
            "county": "CARBON",
            "airport_count": 9
          }
        ]
      }
    ],
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 1524
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 527
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 25
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 13
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 8
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 4
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 1
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    base."faa_region" as "faa_region__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "airport_count__0",
    CASE WHEN group_set IN (1,2) THEN
      base."state"
      END as "state__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "airport_count__1",
    CASE WHEN group_set=2 THEN
      base."county"
      END as "county__2",
    CASE WHEN group_set=2 THEN
      COUNT(1)
      END as "airport_count__2",
    CASE WHEN group_set=3 THEN
      base."fac_type"
      END as "fac_type__3",
    CASE WHEN group_set=3 THEN
      COUNT(1)
      END as "airport_count__3"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,3,1)) as group_set  ) as group_set
  GROUP BY 1,2,4,6,8
)
, __stage1 AS (
  SELECT 
    CASE WHEN group_set=2 THEN 1 ELSE group_set END as group_set,
    "faa_region__0" as "faa_region__0",
    FIRST("airport_count__0") FILTER (WHERE "airport_count__0" IS NOT NULL) as "airport_count__0",
    CASE WHEN group_set IN (1,2) THEN
      "state__1"
      END as "state__1",
    FIRST("airport_count__1") FILTER (WHERE "airport_count__1" IS NOT NULL) as "airport_count__1",
    COALESCE(LIST({
      "county": "county__2", 
      "airport_count": "airport_count__2"}  ORDER BY  "airport_count__2" desc NULLS LAST) FILTER (WHERE group_set=2)[1:4],[]) as "by_county__1",
    CASE WHEN group_set=3 THEN
      "fac_type__3"
      END as "fac_type__3",
    FIRST("airport_count__3") FILTER (WHERE "airport_count__3" IS NOT NULL) as "airport_count__3"
  FROM __stage0
  GROUP BY 1,2,4,7
)
SELECT
  "faa_region__0" as "faa_region",
  MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
  COALESCE(LIST({
    "state": "state__1", 
    "airport_count": "airport_count__1", 
    "by_county": "by_county__1"}  ORDER BY  "airport_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_state_and_county",
  COALESCE(LIST({
    "fac_type": "fac_type__3", 
    "airport_count": "airport_count__3"}  ORDER BY  "airport_count__3" desc NULLS LAST) FILTER (WHERE group_set=3),[]) as "by_facility_type"
FROM __stage1
GROUP BY 1
ORDER BY 2 desc NULLS LAST

See Nested Views for more details.


Refining Views

The + { } gesture lets you extend an existing view with additional query terms.

Add Ordering and Limits

document
run: airports -> by_state + {
  order_by: state desc
  limit: 3
}
QUERY RESULTS
[
  {
    "state": "WY",
    "airport_count": 115
  },
  {
    "state": "WV",
    "airport_count": 116
  },
  {
    "state": "WQ",
    "airport_count": 1
  }
]
SELECT 
   base."state" as "state",
   COUNT(1) as "airport_count"
FROM '../data/airports.parquet' as base
GROUP BY 1
ORDER BY 1 desc NULLS LAST
LIMIT 3

Add Measures

document
run: airports -> by_facility_type + {
  aggregate: avg_elevation
}
QUERY RESULTS
[
  {
    "fac_type": "AIRPORT",
    "airport_count": 13925,
    "avg_elevation": 1237.0441651705567
  },
  {
    "fac_type": "HELIPORT",
    "airport_count": 5135,
    "avg_elevation": 950.5125608568646
  },
  {
    "fac_type": "SEAPLANE BASE",
    "airport_count": 473,
    "avg_elevation": 488.82241014799155
  },
  {
    "fac_type": "ULTRALIGHT",
    "airport_count": 125,
    "avg_elevation": 806.144
  },
  {
    "fac_type": "STOLPORT",
    "airport_count": 86,
    "avg_elevation": 1375.046511627907
  }
]
SELECT 
   base."fac_type" as "fac_type",
   COUNT(1) as "airport_count",
   AVG(base."elevation") as "avg_elevation"
FROM '../data/airports.parquet' as base
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Add Nested Views

document
run: airports -> top_5_states + {
  nest: by_facility_type
}
QUERY RESULTS
[
  {
    "state": "TX",
    "airport_count": 1845,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 1389
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 435
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 8
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 8
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 5
      }
    ]
  },
  {
    "state": "CA",
    "airport_count": 984,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 569
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 396
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 12
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 3
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 2
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 2
      }
    ]
  },
  {
    "state": "IL",
    "airport_count": 890,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 625
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 245
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 8
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 6
      },
      {
        "fac_type": "BALLOONPORT",
        "airport_count": 2
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 2
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 2
      }
    ]
  },
  {
    "state": "FL",
    "airport_count": 856,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 511
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 280
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 43
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 13
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 5
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 4
      }
    ]
  },
  {
    "state": "PA",
    "airport_count": 804,
    "by_facility_type": [
      {
        "fac_type": "AIRPORT",
        "airport_count": 468
      },
      {
        "fac_type": "HELIPORT",
        "airport_count": 307
      },
      {
        "fac_type": "ULTRALIGHT",
        "airport_count": 13
      },
      {
        "fac_type": "SEAPLANE BASE",
        "airport_count": 10
      },
      {
        "fac_type": "GLIDERPORT",
        "airport_count": 3
      },
      {
        "fac_type": "STOLPORT",
        "airport_count": 3
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    base."state" as "state__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "airport_count__0",
    CASE WHEN group_set=1 THEN
      base."fac_type"
      END as "fac_type__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "airport_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,2,4
)
SELECT
  "state__0" as "state",
  MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
  COALESCE(LIST({
    "fac_type": "fac_type__1", 
    "airport_count": "airport_count__1"}  ORDER BY  "airport_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_facility_type"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST
LIMIT 5

Swapping inner and outer reveals different insights:

document
run: airports -> by_facility_type + {
  nest: top_5_states
}
QUERY RESULTS
[
  {
    "fac_type": "AIRPORT",
    "airport_count": 13925,
    "top_5_states": [
      {
        "state": "TX",
        "airport_count": 1389
      },
      {
        "state": "IL",
        "airport_count": 625
      },
      {
        "state": "CA",
        "airport_count": 569
      },
      {
        "state": "OH",
        "airport_count": 537
      },
      {
        "state": "FL",
        "airport_count": 511
      }
    ]
  },
  {
    "fac_type": "HELIPORT",
    "airport_count": 5135,
    "top_5_states": [
      {
        "state": "TX",
        "airport_count": 435
      },
      {
        "state": "CA",
        "airport_count": 396
      },
      {
        "state": "PA",
        "airport_count": 307
      },
      {
        "state": "FL",
        "airport_count": 280
      },
      {
        "state": "NJ",
        "airport_count": 247
      }
    ]
  },
  {
    "fac_type": "SEAPLANE BASE",
    "airport_count": 473,
    "top_5_states": [
      {
        "state": "AK",
        "airport_count": 104
      },
      {
        "state": "MN",
        "airport_count": 72
      },
      {
        "state": "FL",
        "airport_count": 43
      },
      {
        "state": "ME",
        "airport_count": 38
      },
      {
        "state": "NY",
        "airport_count": 23
      }
    ]
  },
  {
    "fac_type": "ULTRALIGHT",
    "airport_count": 125,
    "top_5_states": [
      {
        "state": "LA",
        "airport_count": 18
      },
      {
        "state": "IN",
        "airport_count": 17
      },
      {
        "state": "PA",
        "airport_count": 13
      },
      {
        "state": "TX",
        "airport_count": 8
      },
      {
        "state": "AZ",
        "airport_count": 7
      }
    ]
  },
  {
    "fac_type": "STOLPORT",
    "airport_count": 86,
    "top_5_states": [
      {
        "state": "FL",
        "airport_count": 13
      },
      {
        "state": "TN",
        "airport_count": 9
      },
      {
        "state": "TX",
        "airport_count": 8
      },
      {
        "state": "CO",
        "airport_count": 6
      },
      {
        "state": "NC",
        "airport_count": 4
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    base."fac_type" as "fac_type__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "airport_count__0",
    CASE WHEN group_set=1 THEN
      base."state"
      END as "state__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "airport_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,2,4
)
SELECT
  "fac_type__0" as "fac_type",
  MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
  COALESCE(LIST({
    "state": "state__1", 
    "airport_count": "airport_count__1"}  ORDER BY  "airport_count__1" desc NULLS LAST) FILTER (WHERE group_set=1)[1:5],[]) as "top_5_states"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Aggregates Across the Join Tree

Malloy computes aggregates correctly at any node in a join graph—something difficult in SQL.

Setup: Multi-level Joins

document
source: carriers2 is duckdb.table('../data/carriers.parquet') extend {
  primary_key: code
  measure: carrier_count is count()
}

source: flights2 is duckdb.table('../data/flights.parquet') extend {
  join_one: carriers2 with carrier

  measure:
    flight_count is count()
    total_distance is distance.sum()
    avg_distance is distance.avg()
}

source: airports2 is duckdb.table('../data/airports.parquet') extend {
  join_many: flights2 on code = flights2.origin

  measure: airport_count is count()
  measure: avg_elevation is elevation.avg()
}

Aggregate at Any Level

This query aggregates across three levels of joins—something very difficult in SQL:

document
run: airports2 -> {
  group_by: state
  aggregate:
    flights2.carriers2.carrier_count  // 3 levels deep
    flights2.flight_count
    flights2.total_distance
    airport_count
    avg_elevation
}
QUERY RESULTS
[
  {
    "state": "TX",
    "carrier_count": 14,
    "flight_count": 40085,
    "total_distance": 24171182,
    "airport_count": 1845,
    "avg_elevation": 898.2921409214092
  },
  {
    "state": "CA",
    "carrier_count": 14,
    "flight_count": 40670,
    "total_distance": 37690414,
    "airport_count": 984,
    "avg_elevation": 1087.7073170731708
  },
  {
    "state": "IN",
    "carrier_count": 13,
    "flight_count": 2324,
    "total_distance": 1495747,
    "airport_count": 643,
    "avg_elevation": 759.5412130637636
  },
  {
    "state": "LA",
    "carrier_count": 13,
    "flight_count": 4246,
    "total_distance": 2644460,
    "airport_count": 500,
    "avg_elevation": 64.47
  },
  {
    "state": "OH",
    "carrier_count": 12,
    "flight_count": 7327,
    "total_distance": 3562086,
    "airport_count": 749,
    "avg_elevation": 916.9866488651535
  }
]
SELECT 
   base."state" as "state",
   COUNT(DISTINCT carriers2_0."code") as "carrier_count",
   COUNT(DISTINCT flights2_0."__distinct_key") as "flight_count",
   COALESCE(SUM(flights2_0."distance"),0) as "total_distance",
   COUNT(DISTINCT base."__distinct_key") as "airport_count",
   (
        SELECT AVG(a.val) as value
        FROM (
          SELECT UNNEST(list(distinct {key:base."__distinct_key", val: base."elevation"})) a
        )
      ) as "avg_elevation"
FROM (SELECT GEN_RANDOM_UUID() as "__distinct_key", x.*  FROM '../data/airports.parquet' as x) as base
 LEFT JOIN (SELECT GEN_RANDOM_UUID() as "__distinct_key", x.*  FROM '../data/flights.parquet' as x) AS flights2_0
  ON base."code"=flights2_0."origin"
 LEFT JOIN '../data/carriers.parquet' AS carriers2_0
  ON carriers2_0."code"=flights2_0."carrier"
GROUP BY 1
ORDER BY 2 desc NULLS LAST

See Aggregates for details on symmetric aggregates.


Pipelines

Chain queries together—the output of one becomes the input of the next.

Basic Pipeline

First, a nested query:

document
source: airports3 is duckdb.table('../data/airports.parquet') extend {
  measure: airport_count is count()
}

run: airports3 -> {
  where: fac_type = 'HELIPORT'
  group_by: state
  aggregate: airport_count
  nest: top_3_county is {
    limit: 3
    group_by: county
    aggregate: airport_count
  }
}
QUERY RESULTS
[
  {
    "state": "TX",
    "airport_count": 435,
    "top_3_county": [
      {
        "county": "HARRIS",
        "airport_count": 110
      },
      {
        "county": "TARRANT",
        "airport_count": 35
      },
      {
        "county": "DALLAS",
        "airport_count": 32
      }
    ]
  },
  {
    "state": "CA",
    "airport_count": 396,
    "top_3_county": [
      {
        "county": "LOS ANGELES",
        "airport_count": 151
      },
      {
        "county": "ORANGE",
        "airport_count": 47
      },
      {
        "county": "SAN BERNARDINO",
        "airport_count": 24
      }
    ]
  },
  {
    "state": "PA",
    "airport_count": 307,
    "top_3_county": [
      {
        "county": "MONTGOMERY",
        "airport_count": 29
      },
      {
        "county": "PHILADELPHIA",
        "airport_count": 22
      },
      {
        "county": "ALLEGHENY",
        "airport_count": 22
      }
    ]
  },
  {
    "state": "FL",
    "airport_count": 280,
    "top_3_county": [
      {
        "county": "PALM BEACH",
        "airport_count": 30
      },
      {
        "county": "DADE",
        "airport_count": 27
      },
      {
        "county": "ORANGE",
        "airport_count": 24
      }
    ]
  },
  {
    "state": "NJ",
    "airport_count": 247,
    "top_3_county": [
      {
        "county": "MIDDLESEX",
        "airport_count": 26
      },
      {
        "county": "SOMERSET",
        "airport_count": 23
      },
      {
        "county": "MONMOUTH",
        "airport_count": 23
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    base."state" as "state__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "airport_count__0",
    CASE WHEN group_set=1 THEN
      base."county"
      END as "county__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "airport_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE base."fac_type"='HELIPORT'
  GROUP BY 1,2,4
)
SELECT
  "state__0" as "state",
  MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
  COALESCE(LIST({
    "county": "county__1", 
    "airport_count": "airport_count__1"}  ORDER BY  "airport_count__1" desc NULLS LAST) FILTER (WHERE group_set=1)[1:3],[]) as "top_3_county"
FROM __stage0
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Un-nesting with a Pipeline

Chain another query to flatten the nested results:

document
run: airports3 -> {
  where: fac_type = 'HELIPORT'
  group_by: state
  aggregate: airport_count
  nest: top_3_county is {
    limit: 3
    group_by: county
    aggregate: airport_count
  }
} -> {
  select:
    state
    top_3_county.county
    airports_in_state is airport_count
    airports_in_county is top_3_county.airport_count
    percent_of_state is top_3_county.airport_count / airport_count
}
QUERY RESULTS
[
  {
    "state": "CQ",
    "county": "N MARIANA ISLANDS",
    "airports_in_state": 7,
    "airports_in_county": 7,
    "percent_of_state": 1
  },
  {
    "state": "IN",
    "county": "MARION",
    "airports_in_state": 115,
    "airports_in_county": 20,
    "percent_of_state": 0.17391304347826086
  },
  {
    "state": "IN",
    "county": "LAKE",
    "airports_in_state": 115,
    "airports_in_county": 7,
    "percent_of_state": 0.06086956521739131
  },
  {
    "state": "IN",
    "county": "PORTER",
    "airports_in_state": 115,
    "airports_in_county": 5,
    "percent_of_state": 0.043478260869565216
  },
  {
    "state": "IL",
    "county": "COOK",
    "airports_in_state": 245,
    "airports_in_county": 44,
    "percent_of_state": 0.17959183673469387
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    base."state" as "state__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "airport_count__0",
    CASE WHEN group_set=1 THEN
      base."county"
      END as "county__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "airport_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE base."fac_type"='HELIPORT'
  GROUP BY 1,2,4
)
, __stage1 AS (
  SELECT
    "state__0" as "state",
    MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
    COALESCE(LIST({
      "county": "county__1", 
      "airport_count": "airport_count__1"}  ORDER BY  "airport_count__1" desc NULLS LAST) FILTER (WHERE group_set=1)[1:3],[]) as "top_3_county"
  FROM __stage0
  GROUP BY 1
)
SELECT 
   base."state" as "state",
   top_3_county_0."county" as "county",
   base."airport_count" as "airports_in_state",
   top_3_county_0."airport_count" as "airports_in_county",
   top_3_county_0."airport_count"*1.0/base."airport_count" as "percent_of_state"
FROM __stage1 as base
LEFT JOIN LATERAL (SELECT UNNEST(base."top_3_county"), 1 as ignoreme) as top_3_county_0_outer(top_3_county_0,ignoreme) ON top_3_county_0_outer.ignoreme=1

Pipelines as Views

Complex pipelines can be saved as views:

document
source: airports4 is duckdb.table('../data/airports.parquet') extend {
  measure: airport_count is count()

  view: county_rollup is {
    where: fac_type = 'HELIPORT'
    group_by: state
    aggregate: airport_count
    nest: top_3_county is {
      limit: 3
      group_by: county
      aggregate: airport_count
    }
  } -> {
    select:
      state
      top_3_county.county
      airports_in_state is airport_count
      airports_in_county is top_3_county.airport_count
      percent_of_state is top_3_county.airport_count / airport_count
  }
}

run: airports4 -> county_rollup
QUERY RESULTS
[
  {
    "state": "IL",
    "county": "COOK",
    "airports_in_state": 245,
    "airports_in_county": 44,
    "percent_of_state": 0.17959183673469387
  },
  {
    "state": "IL",
    "county": "DU PAGE",
    "airports_in_state": 245,
    "airports_in_county": 17,
    "percent_of_state": 0.06938775510204082
  },
  {
    "state": "IL",
    "county": "LAKE",
    "airports_in_state": 245,
    "airports_in_county": 12,
    "percent_of_state": 0.04897959183673469
  },
  {
    "state": "IN",
    "county": "MARION",
    "airports_in_state": 115,
    "airports_in_county": 20,
    "percent_of_state": 0.17391304347826086
  },
  {
    "state": "IN",
    "county": "LAKE",
    "airports_in_state": 115,
    "airports_in_county": 7,
    "percent_of_state": 0.06086956521739131
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    base."state" as "state__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "airport_count__0",
    CASE WHEN group_set=1 THEN
      base."county"
      END as "county__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "airport_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE base."fac_type"='HELIPORT'
  GROUP BY 1,2,4
)
, __stage1 AS (
  SELECT
    "state__0" as "state",
    MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
    COALESCE(LIST({
      "county": "county__1", 
      "airport_count": "airport_count__1"}  ORDER BY  "airport_count__1" desc NULLS LAST) FILTER (WHERE group_set=1)[1:3],[]) as "top_3_county"
  FROM __stage0
  GROUP BY 1
)
SELECT 
   base."state" as "state",
   top_3_county_0."county" as "county",
   base."airport_count" as "airports_in_state",
   top_3_county_0."airport_count" as "airports_in_county",
   top_3_county_0."airport_count"*1.0/base."airport_count" as "percent_of_state"
FROM __stage1 as base
LEFT JOIN LATERAL (SELECT UNNEST(base."top_3_county"), 1 as ignoreme) as top_3_county_0_outer(top_3_county_0,ignoreme) ON top_3_county_0_outer.ignoreme=1

Extending Sources

Create new sources based on existing ones with extend:

source: california_airports is airports extend {
  where: state = 'CA'
  measure: ca_airport_count is count()
}

The new source inherits all fields from the original and adds new constraints or calculations.


Sources Based on Queries

Pre-process data with a query, then build a source on top of it.

Define a Model-Level Query

document
query: airport_facts_query is duckdb.table('../data/flights.parquet') -> {
  group_by:
    flight_year is dep_time.year
    origin
    carrier
  aggregate:
    num_flights is count()
    total_distance is distance.sum()
}

run: airport_facts_query
QUERY RESULTS
[
  {
    "flight_year": "2005-01-01T00:00:00.000Z",
    "origin": "LBB",
    "carrier": "WN",
    "num_flights": 39,
    "total_distance": 14747
  },
  {
    "flight_year": "2005-01-01T00:00:00.000Z",
    "origin": "DFW",
    "carrier": "AA",
    "num_flights": 1585,
    "total_distance": 1440360
  },
  {
    "flight_year": "2005-01-01T00:00:00.000Z",
    "origin": "PHX",
    "carrier": "HP",
    "num_flights": 528,
    "total_distance": 375489
  },
  {
    "flight_year": "2005-01-01T00:00:00.000Z",
    "origin": "PDX",
    "carrier": "WN",
    "num_flights": 229,
    "total_distance": 137081
  },
  {
    "flight_year": "2005-01-01T00:00:00.000Z",
    "origin": "MDW",
    "carrier": "WN",
    "num_flights": 1175,
    "total_distance": 1011092
  }
]
SELECT 
   DATE_TRUNC('year', base."dep_time") as "flight_year",
   base."origin" as "origin",
   base."carrier" as "carrier",
   COUNT(1) as "num_flights",
   COALESCE(SUM(base."distance"),0) as "total_distance"
FROM '../data/flights.parquet' as base
GROUP BY 1,2,3
ORDER BY 1 desc NULLS LAST

Build a Source on the Query

document
source: airport_facts is airport_facts_query extend {
  measure: flight_count is num_flights.sum()
  measure: distance is total_distance.sum()

  view: by_year is {
    group_by: flight_year
    aggregate:
      flight_count
      carrier_count is count(carrier)
      origin_count is count(origin)
  }

  view: by_origin is {
    group_by: origin
    aggregate:
      flight_count
      carrier_count is count(carrier)
  }
}

run: airport_facts -> by_origin
QUERY RESULTS
[
  {
    "origin": "ATL",
    "flight_count": 17875,
    "carrier_count": 12
  },
  {
    "origin": "DFW",
    "flight_count": 17782,
    "carrier_count": 13
  },
  {
    "origin": "ORD",
    "flight_count": 14214,
    "carrier_count": 10
  },
  {
    "origin": "PHX",
    "flight_count": 12476,
    "carrier_count": 12
  },
  {
    "origin": "LAS",
    "flight_count": 11096,
    "carrier_count": 10
  }
]
WITH __stage0 AS (
  SELECT 
     DATE_TRUNC('year', base."dep_time") as "flight_year",
     base."origin" as "origin",
     base."carrier" as "carrier",
     COUNT(1) as "num_flights",
     COALESCE(SUM(base."distance"),0) as "total_distance"
  FROM '../data/flights.parquet' as base
  GROUP BY 1,2,3
)
SELECT 
   base."origin" as "origin",
   COALESCE(SUM(base."num_flights"),0) as "flight_count",
   count(distinct base."carrier") as "carrier_count"
FROM __stage0 as base
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Learn More