Malloy Documentation
search

It is often useful to see how groups of people behave over time. The most simple example is in retail. Of the people that first signed up for accouunts how many made an order and how many ordered in subsequent months.

We'll use the following model

document
source: order_items is duckdb.table('../data/order_items.parquet') extend {
  join_one: users is duckdb.table('../data/users.parquet') on user_id=users.id
  measure: 
    user_count is count(user_id)
    order_count is count()
    total_sales is sale_price.sum()
}

Example

In the example below, we look at the first 6 months of 2022. In this example we are looking in terms of numbers of users.

document
run: order_items -> {
  // limit the data to 6 months
  where: (users.created_at.month ? @2022 for 6 months) and
    created_at ? @2022 for 6 months

  // show overall statistics about the orders
  group_by: `Order Month` is created_at.month
  aggregate: 
    `Users that Ordered Count` is user_count

  // Pivot the cohorts
  # pivot
  nest: cohorts is {
    group_by: `User Signup Cohort` is users.created_at.month
    aggregate: 
      `Users in Cohort that Ordered` is user_count
      # percent
      `Percent of cohort that ordered` is user_count/all(user_count)

    order_by: `User Signup Cohort`
  }
}
QUERY RESULTS
[
  {
    "Order Month": "2022-06-01T00:00:00.000Z",
    "Users that Ordered Count": 6079,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 391,
        "Percent of cohort that ordered": 0.06431978943905248
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 494,
        "Percent of cohort that ordered": 0.08126336568514558
      },
      {
        "User Signup Cohort": "2022-03-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 658,
        "Percent of cohort that ordered": 0.10824148708669189
      },
      {
        "User Signup Cohort": "2022-04-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 592,
        "Percent of cohort that ordered": 0.09738443822997203
      },
      {
        "User Signup Cohort": "2022-05-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 426,
        "Percent of cohort that ordered": 0.07007731534791907
      },
      {
        "User Signup Cohort": "2022-06-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 3518,
        "Percent of cohort that ordered": 0.578713604211219
      }
    ]
  },
  {
    "Order Month": "2022-05-01T00:00:00.000Z",
    "Users that Ordered Count": 5957,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 515,
        "Percent of cohort that ordered": 0.08645291253986906
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 631,
        "Percent of cohort that ordered": 0.10592580157797549
      },
      {
        "User Signup Cohort": "2022-03-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 689,
        "Percent of cohort that ordered": 0.1156622460970287
      },
      {
        "User Signup Cohort": "2022-04-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 438,
        "Percent of cohort that ordered": 0.07352694309216048
      },
      {
        "User Signup Cohort": "2022-05-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 3684,
        "Percent of cohort that ordered": 0.6184320966929663
      }
    ]
  },
  {
    "Order Month": "2022-04-01T00:00:00.000Z",
    "Users that Ordered Count": 5145,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 526,
        "Percent of cohort that ordered": 0.1022351797862002
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 575,
        "Percent of cohort that ordered": 0.11175898931000972
      },
      {
        "User Signup Cohort": "2022-03-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 413,
        "Percent of cohort that ordered": 0.08027210884353742
      },
      {
        "User Signup Cohort": "2022-04-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 3631,
        "Percent of cohort that ordered": 0.7057337220602526
      }
    ]
  },
  {
    "Order Month": "2022-03-01T00:00:00.000Z",
    "Users that Ordered Count": 4628,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 563,
        "Percent of cohort that ordered": 0.12165082108902334
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 379,
        "Percent of cohort that ordered": 0.08189282627484874
      },
      {
        "User Signup Cohort": "2022-03-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 3686,
        "Percent of cohort that ordered": 0.7964563526361279
      }
    ]
  },
  {
    "Order Month": "2022-02-01T00:00:00.000Z",
    "Users that Ordered Count": 3714,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 349,
        "Percent of cohort that ordered": 0.09396876682821756
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 3365,
        "Percent of cohort that ordered": 0.9060312331717825
      }
    ]
  },
  {
    "Order Month": "2022-01-01T00:00:00.000Z",
    "Users that Ordered Count": 3412,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Users in Cohort that Ordered": 3412,
        "Percent of cohort that ordered": 1
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    DATE_TRUNC('month', base."created_at") as "Order Month__0",
    (CASE WHEN group_set=0 THEN
      count(distinct base."user_id")
      END) as "Users that Ordered Count__0",
    CASE WHEN group_set=1 THEN
      DATE_TRUNC('month', users_0."created_at")
      END as "User Signup Cohort__1",
    (CASE WHEN group_set=1 THEN
      count(distinct base."user_id")
      END) as "Users in Cohort that Ordered__1",
    (CASE WHEN group_set=1 THEN
      count(distinct base."user_id")
      END)*1.0/MAX((CASE WHEN group_set=0 THEN
      count(distinct base."user_id")
      END)) OVER (PARTITION BY DATE_TRUNC('month', base."created_at")) as "Percent of cohort that ordered__1"
  FROM '../data/order_items.parquet' as base
   LEFT JOIN '../data/users.parquet' AS users_0
    ON base."user_id"=users_0."id"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE (((DATE_TRUNC('month', users_0."created_at")>=TIMESTAMP '2022-01-01 00:00:00') and (DATE_TRUNC('month', users_0."created_at")<(TIMESTAMP '2022-01-01 00:00:00' + INTERVAL (6) month)))) and ((base."created_at">=TIMESTAMP '2022-01-01 00:00:00') and (base."created_at"<(TIMESTAMP '2022-01-01 00:00:00' + INTERVAL (6) month)))
  GROUP BY 1,2,4
)
SELECT
  "Order Month__0" as "Order Month",
  MAX(CASE WHEN group_set=0 THEN "Users that Ordered Count__0" END) as "Users that Ordered Count",
  COALESCE(LIST({
    "User Signup Cohort": "User Signup Cohort__1", 
    "Users in Cohort that Ordered": "Users in Cohort that Ordered__1", 
    "Percent of cohort that ordered": "Percent of cohort that ordered__1"}  ORDER BY  "User Signup Cohort__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "cohorts"
FROM __stage0
GROUP BY 1
ORDER BY 1 desc NULLS LAST

Cohorts as a percentage of Sales.

document
run: order_items -> {
  where: (users.created_at.month ? @2022 for 6 months) and
    created_at ? @2022 for 6 months
  group_by: `Order Month` is created_at.month
  aggregate: 
    # number="$#,##0"
    `Total Sales` is total_sales
  # pivot
  nest: cohorts is {
    group_by: `User Signup Cohort` is users.created_at.month
    aggregate: 
      # percent
      `Cohort as Percent of Sales` is total_sales/all(total_sales)

    order_by: `User Signup Cohort`
  }
}
QUERY RESULTS
[
  {
    "Order Month": "2022-06-01T00:00:00.000Z",
    "Total Sales": 292787.5609920025,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.06321364876554311
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.07950959394744701
      },
      {
        "User Signup Cohort": "2022-03-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.10827970293407221
      },
      {
        "User Signup Cohort": "2022-04-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.09918249251192496
      },
      {
        "User Signup Cohort": "2022-05-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.06476091399702179
      },
      {
        "User Signup Cohort": "2022-06-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.5850536478439909
      }
    ]
  },
  {
    "Order Month": "2022-05-01T00:00:00.000Z",
    "Total Sales": 282180.4910290241,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.07841654817825883
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.1046733245788363
      },
      {
        "User Signup Cohort": "2022-03-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.11391935722575265
      },
      {
        "User Signup Cohort": "2022-04-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.07071711444572101
      },
      {
        "User Signup Cohort": "2022-05-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.6322736555714312
      }
    ]
  },
  {
    "Order Month": "2022-04-01T00:00:00.000Z",
    "Total Sales": 255442.98086452484,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.10074205218437038
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.1097800770439291
      },
      {
        "User Signup Cohort": "2022-03-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.07844819234968298
      },
      {
        "User Signup Cohort": "2022-04-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.7110296784220176
      }
    ]
  },
  {
    "Order Month": "2022-03-01T00:00:00.000Z",
    "Total Sales": 231150.29072272778,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.11044550295494741
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.08307941108991722
      },
      {
        "User Signup Cohort": "2022-03-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.8064750859551354
      }
    ]
  },
  {
    "Order Month": "2022-02-01T00:00:00.000Z",
    "Total Sales": 181266.44057393074,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.09080489467517858
      },
      {
        "User Signup Cohort": "2022-02-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 0.9091951053248214
      }
    ]
  },
  {
    "Order Month": "2022-01-01T00:00:00.000Z",
    "Total Sales": 171324.8705778122,
    "cohorts": [
      {
        "User Signup Cohort": "2022-01-01T00:00:00.000Z",
        "Cohort as Percent of Sales": 1
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    DATE_TRUNC('month', base."created_at") as "Order Month__0",
    (CASE WHEN group_set=0 THEN
      COALESCE(SUM(base."sale_price"),0)
      END) as "Total Sales__0",
    CASE WHEN group_set=1 THEN
      DATE_TRUNC('month', users_0."created_at")
      END as "User Signup Cohort__1",
    (CASE WHEN group_set=1 THEN
      COALESCE(SUM(base."sale_price"),0)
      END)*1.0/MAX((CASE WHEN group_set=0 THEN
      COALESCE(SUM(base."sale_price"),0)
      END)) OVER (PARTITION BY DATE_TRUNC('month', base."created_at")) as "Cohort as Percent of Sales__1"
  FROM '../data/order_items.parquet' as base
   LEFT JOIN '../data/users.parquet' AS users_0
    ON base."user_id"=users_0."id"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  WHERE (((DATE_TRUNC('month', users_0."created_at")>=TIMESTAMP '2022-01-01 00:00:00') and (DATE_TRUNC('month', users_0."created_at")<(TIMESTAMP '2022-01-01 00:00:00' + INTERVAL (6) month)))) and ((base."created_at">=TIMESTAMP '2022-01-01 00:00:00') and (base."created_at"<(TIMESTAMP '2022-01-01 00:00:00' + INTERVAL (6) month)))
  GROUP BY 1,2,4
)
SELECT
  "Order Month__0" as "Order Month",
  MAX(CASE WHEN group_set=0 THEN "Total Sales__0" END) as "Total Sales",
  COALESCE(LIST({
    "User Signup Cohort": "User Signup Cohort__1", 
    "Cohort as Percent of Sales": "Cohort as Percent of Sales__1"}  ORDER BY  "User Signup Cohort__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "cohorts"
FROM __stage0
GROUP BY 1
ORDER BY 1 desc NULLS LAST