Malloy Documentation
search

Malloy allows you to compute sums and averages correctly based on your join tree. Fan-outs based on join relationships will never impact the correctness of these aggregations. This example has flights, joining to aircraft, joining to aircraft_model``. aircraft_model` has the number of seats specified on this model of aircraft. Code below computes sums and averages at various places in the join tree.

document
// join 3 tables, flights, aircraft and aircraft models.
// `flights` is individual flights
// `aircraft` is the plane that made the flight
// `aircraft_models` is data about the kind of aircraft

source: aircraft_models is duckdb.table('../data/aircraft_models.parquet') extend {
  primary_key: aircraft_model_code
}

source: aircraft is duckdb.table('../data/aircraft.parquet') extend {
  primary_key: tail_num
  join_one: aircraft_models with aircraft_model_code
}

source: flights is duckdb.table('../data/flights.parquet') extend {
  join_one: aircraft with tail_num
}

run: flights -> {
  where: dep_time = @2003-01
  group_by: carrier
  aggregate:
    // number of flights
    flight_count is count()
    // number of planes
    aircraft_count is aircraft.count()
    // number of different aircraft_models
    aircraft_model_count is aircraft.aircraft_models.count()
    // count each seat once for each flight.
    seats_for_sale is sum(aircraft.aircraft_models.seats)
    // count the seat once for each plane
    seats_on_all_planes is aircraft.sum(aircraft.aircraft_models.seats)
    // average number of seats on each model by model
    average_seats_per_model is aircraft.aircraft_models.seats.avg()
}
QUERY RESULTS
[
  {
    "carrier": "WN",
    "flight_count": 1209,
    "aircraft_count": 6,
    "aircraft_model_count": 2,
    "seats_for_sale": 175362,
    "seats_on_all_planes": 867,
    "average_seats_per_model": 144.5
  },
  {
    "carrier": "EV",
    "flight_count": 547,
    "aircraft_count": 3,
    "aircraft_model_count": 3,
    "seats_for_sale": 35896,
    "seats_on_all_planes": 184,
    "average_seats_per_model": 61.333333333333336
  },
  {
    "carrier": "UA",
    "flight_count": 524,
    "aircraft_count": 6,
    "aircraft_model_count": 5,
    "seats_for_sale": 107646,
    "seats_on_all_planes": 1357,
    "average_seats_per_model": 231.4
  },
  {
    "carrier": "AA",
    "flight_count": 422,
    "aircraft_count": 6,
    "aircraft_model_count": 4,
    "seats_for_sale": 73492,
    "seats_on_all_planes": 945,
    "average_seats_per_model": 150.25
  },
  {
    "carrier": "NW",
    "flight_count": 419,
    "aircraft_count": 5,
    "aircraft_model_count": 5,
    "seats_for_sale": 78528,
    "seats_on_all_planes": 914,
    "average_seats_per_model": 182.8
  }
]
SELECT 
   base."carrier" as "carrier",
   COUNT(1) as "flight_count",
   COUNT(DISTINCT aircraft_0."tail_num") as "aircraft_count",
   COUNT(DISTINCT aircraft_models_0."aircraft_model_code") as "aircraft_model_count",
   COALESCE(SUM(aircraft_models_0."seats"),0) as "seats_for_sale",
   COALESCE((
        SELECT SUM(a.val) as value
        FROM (
          SELECT UNNEST(list(distinct {key:aircraft_0."tail_num", val: aircraft_models_0."seats"})) a
        )
      ),0) as "seats_on_all_planes",
   (
        SELECT AVG(a.val) as value
        FROM (
          SELECT UNNEST(list(distinct {key:aircraft_models_0."aircraft_model_code", val: aircraft_models_0."seats"})) a
        )
      ) as "average_seats_per_model"
FROM '../data/flights.parquet' as base
 LEFT JOIN '../data/aircraft.parquet' AS aircraft_0
  ON aircraft_0."tail_num"=base."tail_num"
 LEFT JOIN '../data/aircraft_models.parquet' AS aircraft_models_0
  ON aircraft_models_0."aircraft_model_code"=aircraft_0."aircraft_model_code"
WHERE (base."dep_time">=TIMESTAMP '2003-01-01 00:00:00') and (base."dep_time"<TIMESTAMP '2003-02-01 00:00:00')
GROUP BY 1
ORDER BY 2 desc NULLS LAST