Malloy Documentation
search

By examining the range of values over a dataset, we can compute the appropriate histogram bin size, while capturing the data at the same time. We can then pipe the output to another query to display a histogram.

document
source: airports is duckdb.table('../data/airports.parquet') extend {
  measure: airport_count is count()
  # bar_chart
  view: by_elevation is {
    aggregate: bin_size is (max(elevation) - min(elevation)) / 30
    nest: data is  {
      group_by: elevation
      aggregate: row_count is count()
    }
  } -> {
    group_by: elevation is 
      floor(data.elevation / bin_size) * nullif(bin_size, 0) + bin_size / 2
    aggregate: airport_count is data.row_count.sum()
    order_by: elevation
  }
}

Overall elevation distribution

The query can be used to show the overall distribution of the data.

We are showing the bin_size in this exmaple for clarity.

document
run: airports -> by_elevation
QUERY RESULTS
[
  {
    "elevation": -210.86666666666667,
    "airport_count": 12
  },
  {
    "elevation": 210.86666666666667,
    "airport_count": 6230
  },
  {
    "elevation": 632.6,
    "airport_count": 5015
  },
  {
    "elevation": 1054.3333333333335,
    "airport_count": 3858
  },
  {
    "elevation": 1476.0666666666666,
    "airport_count": 1362
  },
  {
    "elevation": 1897.8000000000002,
    "airport_count": 625
  },
  {
    "elevation": 2319.5333333333338,
    "airport_count": 446
  },
  {
    "elevation": 2741.266666666667,
    "airport_count": 333
  },
  {
    "elevation": 3163,
    "airport_count": 244
  },
  {
    "elevation": 3584.7333333333336,
    "airport_count": 227
  },
  {
    "elevation": 4006.466666666667,
    "airport_count": 225
  },
  {
    "elevation": 4428.200000000001,
    "airport_count": 240
  },
  {
    "elevation": 4849.933333333333,
    "airport_count": 248
  },
  {
    "elevation": 5271.666666666667,
    "airport_count": 178
  },
  {
    "elevation": 5693.400000000001,
    "airport_count": 128
  },
  {
    "elevation": 6115.133333333333,
    "airport_count": 108
  },
  {
    "elevation": 6536.866666666667,
    "airport_count": 96
  },
  {
    "elevation": 6958.6,
    "airport_count": 70
  },
  {
    "elevation": 7380.333333333334,
    "airport_count": 40
  },
  {
    "elevation": 7802.0666666666675,
    "airport_count": 33
  },
  {
    "elevation": 8223.8,
    "airport_count": 23
  },
  {
    "elevation": 8645.533333333335,
    "airport_count": 12
  },
  {
    "elevation": 9067.266666666666,
    "airport_count": 10
  },
  {
    "elevation": 9489,
    "airport_count": 8
  },
  {
    "elevation": 9910.733333333334,
    "airport_count": 11
  },
  {
    "elevation": 10332.466666666667,
    "airport_count": 3
  },
  {
    "elevation": 10754.2,
    "airport_count": 5
  },
  {
    "elevation": 11175.933333333334,
    "airport_count": 2
  },
  {
    "elevation": 12441.133333333333,
    "airport_count": 1
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    ((CASE WHEN group_set=0 THEN
      max(base."elevation")
      END-CASE WHEN group_set=0 THEN
      min(base."elevation")
      END))*1.0/30 as "bin_size__0",
    CASE WHEN group_set=1 THEN
      base."elevation"
      END as "elevation__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "row_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,3
)
, __stage1 AS (
  SELECT
    MAX(CASE WHEN group_set=0 THEN "bin_size__0" END) as "bin_size",
    COALESCE(LIST({
      "elevation": "elevation__1", 
      "row_count": "row_count__1"}  ORDER BY  "row_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "data"
  FROM __stage0
)
SELECT 
   ((FLOOR(data_0."elevation"*1.0/base."bin_size"))*(NULLIF(base."bin_size",0)))+(base."bin_size"*1.0/2) as "elevation",
   COALESCE(SUM(data_0."row_count"),0) as "airport_count"
FROM __stage1 as base
LEFT JOIN LATERAL (SELECT UNNEST(base."data"), 1 as ignoreme) as data_0_outer(data_0,ignoreme) ON data_0_outer.ignoreme=1
GROUP BY 1
ORDER BY 1 ASC NULLS LAST

Distribution Adapts Automatically

Notice that when we look at Florida, the bin-width is different.

document
run: airports extend { where: state = 'FL' } -> by_elevation
QUERY RESULTS
[
  {
    "elevation": 5.566666666666666,
    "airport_count": 137
  },
  {
    "elevation": 16.7,
    "airport_count": 134
  },
  {
    "elevation": 27.833333333333332,
    "airport_count": 87
  },
  {
    "elevation": 38.96666666666667,
    "airport_count": 40
  },
  {
    "elevation": 50.099999999999994,
    "airport_count": 55
  },
  {
    "elevation": 61.233333333333334,
    "airport_count": 45
  },
  {
    "elevation": 72.36666666666666,
    "airport_count": 50
  },
  {
    "elevation": 83.5,
    "airport_count": 46
  },
  {
    "elevation": 94.63333333333333,
    "airport_count": 57
  },
  {
    "elevation": 105.76666666666665,
    "airport_count": 31
  },
  {
    "elevation": 116.89999999999999,
    "airport_count": 32
  },
  {
    "elevation": 128.03333333333333,
    "airport_count": 25
  },
  {
    "elevation": 139.16666666666666,
    "airport_count": 16
  },
  {
    "elevation": 150.29999999999998,
    "airport_count": 27
  },
  {
    "elevation": 161.43333333333334,
    "airport_count": 13
  },
  {
    "elevation": 172.56666666666666,
    "airport_count": 6
  },
  {
    "elevation": 183.7,
    "airport_count": 5
  },
  {
    "elevation": 194.83333333333331,
    "airport_count": 14
  },
  {
    "elevation": 205.96666666666664,
    "airport_count": 4
  },
  {
    "elevation": 217.1,
    "airport_count": 6
  },
  {
    "elevation": 228.23333333333332,
    "airport_count": 3
  },
  {
    "elevation": 239.36666666666665,
    "airport_count": 4
  },
  {
    "elevation": 250.5,
    "airport_count": 6
  },
  {
    "elevation": 261.6333333333333,
    "airport_count": 3
  },
  {
    "elevation": 272.76666666666665,
    "airport_count": 2
  },
  {
    "elevation": 283.9,
    "airport_count": 3
  },
  {
    "elevation": 295.0333333333333,
    "airport_count": 3
  },
  {
    "elevation": 328.43333333333334,
    "airport_count": 1
  },
  {
    "elevation": 339.56666666666666,
    "airport_count": 1
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    ((CASE WHEN group_set=0 THEN
      max(base."elevation")
      END-CASE WHEN group_set=0 THEN
      min(base."elevation")
      END))*1.0/30 as "bin_size__0",
    CASE WHEN group_set=1 THEN
      base."elevation"
      END as "elevation__1",
    CASE WHEN group_set=1 THEN
      COUNT(1)
      END as "row_count__1"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE (base."state"='FL')
  AND ((group_set NOT IN (1) OR (group_set IN (1) AND base."state"='FL')))
  GROUP BY 1,3
)
, __stage1 AS (
  SELECT
    MAX(CASE WHEN group_set=0 THEN "bin_size__0" END) as "bin_size",
    COALESCE(LIST({
      "elevation": "elevation__1", 
      "row_count": "row_count__1"}  ORDER BY  "row_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "data"
  FROM __stage0
)
SELECT 
   ((FLOOR(data_0."elevation"*1.0/base."bin_size"))*(NULLIF(base."bin_size",0)))+(base."bin_size"*1.0/2) as "elevation",
   COALESCE(SUM(data_0."row_count"),0) as "airport_count"
FROM __stage1 as base
LEFT JOIN LATERAL (SELECT UNNEST(base."data"), 1 as ignoreme) as data_0_outer(data_0,ignoreme) ON data_0_outer.ignoreme=1
GROUP BY 1
ORDER BY 1 ASC NULLS LAST

Elevation within states

This binning even adapts when the queries are nested.

Notice that all the binning is local to the individual states. For example the bottom bin in Colorado starts at an elevation of 3000ish feet.

document
run: airports -> {
  group_by: state is state
  aggregate: airport_count
  nest: by_elevation
}
QUERY RESULTS
[
  {
    "state": "TX",
    "airport_count": 1845,
    "by_elevation": [
      {
        "elevation": 83.26666666666667,
        "airport_count": 423
      },
      {
        "elevation": 249.8,
        "airport_count": 127
      },
      {
        "elevation": 416.3333333333333,
        "airport_count": 197
      },
      {
        "elevation": 582.8666666666667,
        "airport_count": 284
      },
      {
        "elevation": 749.4,
        "airport_count": 213
      },
      {
        "elevation": 915.9333333333333,
        "airport_count": 112
      },
      {
        "elevation": 1082.4666666666667,
        "airport_count": 68
      },
      {
        "elevation": 1249,
        "airport_count": 42
      },
      {
        "elevation": 1415.5333333333333,
        "airport_count": 49
      },
      {
        "elevation": 1582.0666666666666,
        "airport_count": 31
      },
      {
        "elevation": 1748.6,
        "airport_count": 42
      },
      {
        "elevation": 1915.1333333333332,
        "airport_count": 24
      },
      {
        "elevation": 2081.666666666667,
        "airport_count": 16
      },
      {
        "elevation": 2248.2000000000003,
        "airport_count": 18
      },
      {
        "elevation": 2414.7333333333336,
        "airport_count": 20
      },
      {
        "elevation": 2581.266666666667,
        "airport_count": 17
      },
      {
        "elevation": 2747.8,
        "airport_count": 16
      },
      {
        "elevation": 2914.3333333333335,
        "airport_count": 10
      },
      {
        "elevation": 3080.866666666667,
        "airport_count": 17
      },
      {
        "elevation": 3247.4,
        "airport_count": 25
      },
      {
        "elevation": 3413.9333333333334,
        "airport_count": 17
      },
      {
        "elevation": 3580.4666666666667,
        "airport_count": 22
      },
      {
        "elevation": 3747,
        "airport_count": 23
      },
      {
        "elevation": 3913.5333333333333,
        "airport_count": 12
      },
      {
        "elevation": 4080.066666666667,
        "airport_count": 4
      },
      {
        "elevation": 4246.599999999999,
        "airport_count": 2
      },
      {
        "elevation": 4413.133333333333,
        "airport_count": 4
      },
      {
        "elevation": 4579.666666666666,
        "airport_count": 6
      },
      {
        "elevation": 4746.2,
        "airport_count": 2
      },
      {
        "elevation": 4912.733333333333,
        "airport_count": 1
      },
      {
        "elevation": 5079.266666666666,
        "airport_count": 1
      }
    ]
  },
  {
    "state": "CA",
    "airport_count": 984,
    "by_elevation": [
      {
        "elevation": -122.3,
        "airport_count": 10
      },
      {
        "elevation": 122.3,
        "airport_count": 346
      },
      {
        "elevation": 366.9,
        "airport_count": 169
      },
      {
        "elevation": 611.5,
        "airport_count": 73
      },
      {
        "elevation": 856.0999999999999,
        "airport_count": 57
      },
      {
        "elevation": 1100.7,
        "airport_count": 44
      },
      {
        "elevation": 1345.3,
        "airport_count": 37
      },
      {
        "elevation": 1589.8999999999999,
        "airport_count": 26
      },
      {
        "elevation": 1834.5,
        "airport_count": 16
      },
      {
        "elevation": 2079.1,
        "airport_count": 13
      },
      {
        "elevation": 2323.7000000000003,
        "airport_count": 31
      },
      {
        "elevation": 2568.3,
        "airport_count": 32
      },
      {
        "elevation": 2812.9,
        "airport_count": 23
      },
      {
        "elevation": 3057.5,
        "airport_count": 17
      },
      {
        "elevation": 3302.1,
        "airport_count": 15
      },
      {
        "elevation": 3546.7000000000003,
        "airport_count": 7
      },
      {
        "elevation": 3791.3,
        "airport_count": 8
      },
      {
        "elevation": 4035.9,
        "airport_count": 10
      },
      {
        "elevation": 4280.5,
        "airport_count": 8
      },
      {
        "elevation": 4525.1,
        "airport_count": 9
      },
      {
        "elevation": 5014.3,
        "airport_count": 7
      },
      {
        "elevation": 5258.9,
        "airport_count": 4
      },
      {
        "elevation": 5503.5,
        "airport_count": 1
      },
      {
        "elevation": 5748.1,
        "airport_count": 4
      },
      {
        "elevation": 5992.7,
        "airport_count": 1
      },
      {
        "elevation": 6237.3,
        "airport_count": 4
      },
      {
        "elevation": 6481.9,
        "airport_count": 3
      },
      {
        "elevation": 6726.5,
        "airport_count": 6
      },
      {
        "elevation": 6971.1,
        "airport_count": 2
      },
      {
        "elevation": 7215.7,
        "airport_count": 1
      }
    ]
  },
  {
    "state": "IL",
    "airport_count": 890,
    "by_elevation": [
      {
        "elevation": 310,
        "airport_count": 1
      },
      {
        "elevation": 334.8,
        "airport_count": 5
      },
      {
        "elevation": 359.59999999999997,
        "airport_count": 4
      },
      {
        "elevation": 384.4,
        "airport_count": 5
      },
      {
        "elevation": 409.2,
        "airport_count": 15
      },
      {
        "elevation": 434,
        "airport_count": 26
      },
      {
        "elevation": 458.8,
        "airport_count": 30
      },
      {
        "elevation": 483.59999999999997,
        "airport_count": 14
      },
      {
        "elevation": 508.4,
        "airport_count": 17
      },
      {
        "elevation": 533.2,
        "airport_count": 22
      },
      {
        "elevation": 558,
        "airport_count": 22
      },
      {
        "elevation": 582.8,
        "airport_count": 47
      },
      {
        "elevation": 607.6,
        "airport_count": 52
      },
      {
        "elevation": 632.4,
        "airport_count": 79
      },
      {
        "elevation": 657.2,
        "airport_count": 90
      },
      {
        "elevation": 682,
        "airport_count": 67
      },
      {
        "elevation": 706.8,
        "airport_count": 69
      },
      {
        "elevation": 731.6,
        "airport_count": 71
      },
      {
        "elevation": 756.4,
        "airport_count": 50
      },
      {
        "elevation": 781.2,
        "airport_count": 46
      },
      {
        "elevation": 806,
        "airport_count": 41
      },
      {
        "elevation": 830.8,
        "airport_count": 26
      },
      {
        "elevation": 855.6,
        "airport_count": 27
      },
      {
        "elevation": 880.4,
        "airport_count": 20
      },
      {
        "elevation": 905.2,
        "airport_count": 17
      },
      {
        "elevation": 930,
        "airport_count": 9
      },
      {
        "elevation": 954.8,
        "airport_count": 8
      },
      {
        "elevation": 979.6,
        "airport_count": 7
      },
      {
        "elevation": 1004.4,
        "airport_count": 1
      },
      {
        "elevation": 1054.0000000000002,
        "airport_count": 2
      }
    ]
  },
  {
    "state": "FL",
    "airport_count": 856,
    "by_elevation": [
      {
        "elevation": 5.566666666666666,
        "airport_count": 137
      },
      {
        "elevation": 16.7,
        "airport_count": 134
      },
      {
        "elevation": 27.833333333333332,
        "airport_count": 87
      },
      {
        "elevation": 38.96666666666667,
        "airport_count": 40
      },
      {
        "elevation": 50.099999999999994,
        "airport_count": 55
      },
      {
        "elevation": 61.233333333333334,
        "airport_count": 45
      },
      {
        "elevation": 72.36666666666666,
        "airport_count": 50
      },
      {
        "elevation": 83.5,
        "airport_count": 46
      },
      {
        "elevation": 94.63333333333333,
        "airport_count": 57
      },
      {
        "elevation": 105.76666666666665,
        "airport_count": 31
      },
      {
        "elevation": 116.89999999999999,
        "airport_count": 32
      },
      {
        "elevation": 128.03333333333333,
        "airport_count": 25
      },
      {
        "elevation": 139.16666666666666,
        "airport_count": 16
      },
      {
        "elevation": 150.29999999999998,
        "airport_count": 27
      },
      {
        "elevation": 161.43333333333334,
        "airport_count": 13
      },
      {
        "elevation": 172.56666666666666,
        "airport_count": 6
      },
      {
        "elevation": 183.7,
        "airport_count": 5
      },
      {
        "elevation": 194.83333333333331,
        "airport_count": 14
      },
      {
        "elevation": 205.96666666666664,
        "airport_count": 4
      },
      {
        "elevation": 217.1,
        "airport_count": 6
      },
      {
        "elevation": 228.23333333333332,
        "airport_count": 3
      },
      {
        "elevation": 239.36666666666665,
        "airport_count": 4
      },
      {
        "elevation": 250.5,
        "airport_count": 6
      },
      {
        "elevation": 261.6333333333333,
        "airport_count": 3
      },
      {
        "elevation": 272.76666666666665,
        "airport_count": 2
      },
      {
        "elevation": 283.9,
        "airport_count": 3
      },
      {
        "elevation": 295.0333333333333,
        "airport_count": 3
      },
      {
        "elevation": 328.43333333333334,
        "airport_count": 1
      },
      {
        "elevation": 339.56666666666666,
        "airport_count": 1
      }
    ]
  },
  {
    "state": "PA",
    "airport_count": 804,
    "by_elevation": [
      {
        "elevation": 75.01666666666667,
        "airport_count": 28
      },
      {
        "elevation": 225.05,
        "airport_count": 53
      },
      {
        "elevation": 375.0833333333333,
        "airport_count": 106
      },
      {
        "elevation": 525.1166666666667,
        "airport_count": 147
      },
      {
        "elevation": 675.15,
        "airport_count": 67
      },
      {
        "elevation": 825.1833333333333,
        "airport_count": 46
      },
      {
        "elevation": 975.2166666666667,
        "airport_count": 51
      },
      {
        "elevation": 1125.25,
        "airport_count": 85
      },
      {
        "elevation": 1275.2833333333333,
        "airport_count": 80
      },
      {
        "elevation": 1425.3166666666666,
        "airport_count": 45
      },
      {
        "elevation": 1575.35,
        "airport_count": 27
      },
      {
        "elevation": 1725.3833333333332,
        "airport_count": 25
      },
      {
        "elevation": 1875.4166666666667,
        "airport_count": 11
      },
      {
        "elevation": 2025.45,
        "airport_count": 16
      },
      {
        "elevation": 2175.4833333333336,
        "airport_count": 5
      },
      {
        "elevation": 2325.516666666667,
        "airport_count": 8
      },
      {
        "elevation": 2475.55,
        "airport_count": 2
      },
      {
        "elevation": 2925.65,
        "airport_count": 1
      },
      {
        "elevation": 4576.016666666666,
        "airport_count": 1
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    base."state" as "state__0",
    CASE WHEN group_set=0 THEN
      COUNT(1)
      END as "airport_count__0",
    ((CASE WHEN group_set=1 THEN
      max(base."elevation")
      END-CASE WHEN group_set=1 THEN
      min(base."elevation")
      END))*1.0/30 as "bin_size__1",
    CASE WHEN group_set=2 THEN
      base."elevation"
      END as "elevation__2",
    CASE WHEN group_set=2 THEN
      COUNT(1)
      END as "row_count__2"
  FROM '../data/airports.parquet' as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,2,1)) as group_set  ) as group_set
  GROUP BY 1,2,5
)
, __stage1 AS (
  SELECT 
    CASE WHEN group_set=2 THEN 1 ELSE group_set END as group_set,
    "state__0" as "state__0",
    FIRST("airport_count__0") FILTER (WHERE "airport_count__0" IS NOT NULL) as "airport_count__0",
    FIRST("bin_size__1") FILTER (WHERE "bin_size__1" IS NOT NULL) as "bin_size__1",
    COALESCE(LIST({
      "elevation": "elevation__2", 
      "row_count": "row_count__2"}  ORDER BY  "row_count__2" desc NULLS LAST) FILTER (WHERE group_set=2),[]) as "data__1"
  FROM __stage0
  GROUP BY 1,2
)
SELECT
  "state__0" as "state",
  MAX(CASE WHEN group_set=0 THEN "airport_count__0" END) as "airport_count",
  (WITH __stage0 AS (
    SELECT 
       ((FLOOR(data_0."elevation"*1.0/base."bin_size"))*(NULLIF(base."bin_size",0)))+(base."bin_size"*1.0/2) as "elevation",
       COALESCE(SUM(data_0."row_count"),0) as "airport_count"
    FROM (SELECT UNNEST([COALESCE(FIRST({"bin_size": "bin_size__1" , "data": "data__1" }) FILTER(WHERE group_set=1), {"bin_size": NULL, "data": NULL})]) as base) as base
    LEFT JOIN LATERAL (SELECT UNNEST(base."data"), 1 as ignoreme) as data_0_outer(data_0,ignoreme) ON data_0_outer.ignoreme=1
    GROUP BY 1
    ORDER BY 1 ASC NULLS LAST
  )
  SELECT LIST(STRUCT_PACK("elevation","airport_count") ORDER BY elevation asc) FROM __stage0
  ) as "by_elevation"
FROM __stage1
GROUP BY 1
ORDER BY 2 desc NULLS LAST