It is often useful to see how groups of people behave over time. The most simple example is in retail. Of the people that first signed up for accouunts how many made an order and how many ordered in subsequent months.
We'll use the following model
source: order_items is duckdb.table('../data/order_items.parquet') extend { join_one: users is duckdb.table('../data/users.parquet') on user_id=users.id measure: user_count is count(user_id) order_count is count() total_sales is sale_price.sum() }
Example
In the example below, we look at the first 6 months of 2022. In this example we are looking in terms of numbers of users.
run: order_items -> { // limit the data to 6 months where: (users.created_at.month ? @2022 for 6 months) and created_at ? @2022 for 6 months // show overall statistics about the orders group_by: `Order Month` is created_at.month aggregate: `Users that Ordered Count` is user_count // Pivot the cohorts # pivot nest: cohorts is { group_by: `User Signup Cohort` is users.created_at.month aggregate: `Users in Cohort that Ordered` is user_count # percent `Percent of cohort that ordered` is user_count/all(user_count) order_by: `User Signup Cohort` } }
[ { "Order Month": "2022-06-01T00:00:00.000Z", "Users that Ordered Count": 6079, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Users in Cohort that Ordered": 391, "Percent of cohort that ordered": 0.06431978943905248 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Users in Cohort that Ordered": 494, "Percent of cohort that ordered": 0.08126336568514558 }, { "User Signup Cohort": "2022-03-01T00:00:00.000Z", "Users in Cohort that Ordered": 658, "Percent of cohort that ordered": 0.10824148708669189 }, { "User Signup Cohort": "2022-04-01T00:00:00.000Z", "Users in Cohort that Ordered": 592, "Percent of cohort that ordered": 0.09738443822997203 }, { "User Signup Cohort": "2022-05-01T00:00:00.000Z", "Users in Cohort that Ordered": 426, "Percent of cohort that ordered": 0.07007731534791907 }, { "User Signup Cohort": "2022-06-01T00:00:00.000Z", "Users in Cohort that Ordered": 3518, "Percent of cohort that ordered": 0.578713604211219 } ] }, { "Order Month": "2022-05-01T00:00:00.000Z", "Users that Ordered Count": 5957, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Users in Cohort that Ordered": 515, "Percent of cohort that ordered": 0.08645291253986906 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Users in Cohort that Ordered": 631, "Percent of cohort that ordered": 0.10592580157797549 }, { "User Signup Cohort": "2022-03-01T00:00:00.000Z", "Users in Cohort that Ordered": 689, "Percent of cohort that ordered": 0.1156622460970287 }, { "User Signup Cohort": "2022-04-01T00:00:00.000Z", "Users in Cohort that Ordered": 438, "Percent of cohort that ordered": 0.07352694309216048 }, { "User Signup Cohort": "2022-05-01T00:00:00.000Z", "Users in Cohort that Ordered": 3684, "Percent of cohort that ordered": 0.6184320966929663 } ] }, { "Order Month": "2022-04-01T00:00:00.000Z", "Users that Ordered Count": 5145, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Users in Cohort that Ordered": 526, "Percent of cohort that ordered": 0.1022351797862002 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Users in Cohort that Ordered": 575, "Percent of cohort that ordered": 0.11175898931000972 }, { "User Signup Cohort": "2022-03-01T00:00:00.000Z", "Users in Cohort that Ordered": 413, "Percent of cohort that ordered": 0.08027210884353742 }, { "User Signup Cohort": "2022-04-01T00:00:00.000Z", "Users in Cohort that Ordered": 3631, "Percent of cohort that ordered": 0.7057337220602526 } ] }, { "Order Month": "2022-03-01T00:00:00.000Z", "Users that Ordered Count": 4628, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Users in Cohort that Ordered": 563, "Percent of cohort that ordered": 0.12165082108902334 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Users in Cohort that Ordered": 379, "Percent of cohort that ordered": 0.08189282627484874 }, { "User Signup Cohort": "2022-03-01T00:00:00.000Z", "Users in Cohort that Ordered": 3686, "Percent of cohort that ordered": 0.7964563526361279 } ] }, { "Order Month": "2022-02-01T00:00:00.000Z", "Users that Ordered Count": 3714, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Users in Cohort that Ordered": 349, "Percent of cohort that ordered": 0.09396876682821756 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Users in Cohort that Ordered": 3365, "Percent of cohort that ordered": 0.9060312331717825 } ] }, { "Order Month": "2022-01-01T00:00:00.000Z", "Users that Ordered Count": 3412, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Users in Cohort that Ordered": 3412, "Percent of cohort that ordered": 1 } ] } ]
WITH __stage0 AS ( SELECT group_set, DATE_TRUNC('month', base."created_at") as "Order Month__0", (CASE WHEN group_set=0 THEN count(distinct base."user_id") END) as "Users that Ordered Count__0", CASE WHEN group_set=1 THEN DATE_TRUNC('month', users_0."created_at") END as "User Signup Cohort__1", (CASE WHEN group_set=1 THEN count(distinct base."user_id") END) as "Users in Cohort that Ordered__1", (CASE WHEN group_set=1 THEN count(distinct base."user_id") END)*1.0/MAX((CASE WHEN group_set=0 THEN count(distinct base."user_id") END)) OVER (PARTITION BY DATE_TRUNC('month', base."created_at")) as "Percent of cohort that ordered__1" FROM '../data/order_items.parquet' as base LEFT JOIN '../data/users.parquet' AS users_0 ON base."user_id"=users_0."id" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set WHERE (((DATE_TRUNC('month', users_0."created_at")>=TIMESTAMP '2022-01-01 00:00:00') and (DATE_TRUNC('month', users_0."created_at")<(TIMESTAMP '2022-01-01 00:00:00' + INTERVAL (6) month)))) and ((base."created_at">=TIMESTAMP '2022-01-01 00:00:00') and (base."created_at"<(TIMESTAMP '2022-01-01 00:00:00' + INTERVAL (6) month))) GROUP BY 1,2,4 ) SELECT "Order Month__0" as "Order Month", MAX(CASE WHEN group_set=0 THEN "Users that Ordered Count__0" END) as "Users that Ordered Count", COALESCE(LIST({ "User Signup Cohort": "User Signup Cohort__1", "Users in Cohort that Ordered": "Users in Cohort that Ordered__1", "Percent of cohort that ordered": "Percent of cohort that ordered__1"} ORDER BY "User Signup Cohort__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "cohorts" FROM __stage0 GROUP BY 1 ORDER BY 1 desc NULLS LAST
Cohorts as a percentage of Sales.
run: order_items -> { where: (users.created_at.month ? @2022 for 6 months) and created_at ? @2022 for 6 months group_by: `Order Month` is created_at.month aggregate: # number="$#,##0" `Total Sales` is total_sales # pivot nest: cohorts is { group_by: `User Signup Cohort` is users.created_at.month aggregate: # percent `Cohort as Percent of Sales` is total_sales/all(total_sales) order_by: `User Signup Cohort` } }
[ { "Order Month": "2022-06-01T00:00:00.000Z", "Total Sales": 292787.5609920025, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.06321364876554311 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.07950959394744701 }, { "User Signup Cohort": "2022-03-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.10827970293407221 }, { "User Signup Cohort": "2022-04-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.09918249251192496 }, { "User Signup Cohort": "2022-05-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.06476091399702179 }, { "User Signup Cohort": "2022-06-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.5850536478439909 } ] }, { "Order Month": "2022-05-01T00:00:00.000Z", "Total Sales": 282180.4910290241, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.07841654817825883 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.1046733245788363 }, { "User Signup Cohort": "2022-03-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.11391935722575265 }, { "User Signup Cohort": "2022-04-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.07071711444572101 }, { "User Signup Cohort": "2022-05-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.6322736555714312 } ] }, { "Order Month": "2022-04-01T00:00:00.000Z", "Total Sales": 255442.98086452484, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.10074205218437038 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.1097800770439291 }, { "User Signup Cohort": "2022-03-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.07844819234968298 }, { "User Signup Cohort": "2022-04-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.7110296784220176 } ] }, { "Order Month": "2022-03-01T00:00:00.000Z", "Total Sales": 231150.29072272778, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.11044550295494741 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.08307941108991722 }, { "User Signup Cohort": "2022-03-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.8064750859551354 } ] }, { "Order Month": "2022-02-01T00:00:00.000Z", "Total Sales": 181266.44057393074, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.09080489467517858 }, { "User Signup Cohort": "2022-02-01T00:00:00.000Z", "Cohort as Percent of Sales": 0.9091951053248214 } ] }, { "Order Month": "2022-01-01T00:00:00.000Z", "Total Sales": 171324.8705778122, "cohorts": [ { "User Signup Cohort": "2022-01-01T00:00:00.000Z", "Cohort as Percent of Sales": 1 } ] } ]
WITH __stage0 AS ( SELECT group_set, DATE_TRUNC('month', base."created_at") as "Order Month__0", (CASE WHEN group_set=0 THEN COALESCE(SUM(base."sale_price"),0) END) as "Total Sales__0", CASE WHEN group_set=1 THEN DATE_TRUNC('month', users_0."created_at") END as "User Signup Cohort__1", (CASE WHEN group_set=1 THEN COALESCE(SUM(base."sale_price"),0) END)*1.0/MAX((CASE WHEN group_set=0 THEN COALESCE(SUM(base."sale_price"),0) END)) OVER (PARTITION BY DATE_TRUNC('month', base."created_at")) as "Cohort as Percent of Sales__1" FROM '../data/order_items.parquet' as base LEFT JOIN '../data/users.parquet' AS users_0 ON base."user_id"=users_0."id" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set WHERE (((DATE_TRUNC('month', users_0."created_at")>=TIMESTAMP '2022-01-01 00:00:00') and (DATE_TRUNC('month', users_0."created_at")<(TIMESTAMP '2022-01-01 00:00:00' + INTERVAL (6) month)))) and ((base."created_at">=TIMESTAMP '2022-01-01 00:00:00') and (base."created_at"<(TIMESTAMP '2022-01-01 00:00:00' + INTERVAL (6) month))) GROUP BY 1,2,4 ) SELECT "Order Month__0" as "Order Month", MAX(CASE WHEN group_set=0 THEN "Total Sales__0" END) as "Total Sales", COALESCE(LIST({ "User Signup Cohort": "User Signup Cohort__1", "Cohort as Percent of Sales": "Cohort as Percent of Sales__1"} ORDER BY "User Signup Cohort__1" ASC NULLS LAST) FILTER (WHERE group_set=1),[]) as "cohorts" FROM __stage0 GROUP BY 1 ORDER BY 1 desc NULLS LAST