Malloy has a special query operator that builds _Dimensional Search Indexes for sources. A Dimensional Search Index is a table with 4 columns. Dimensional indexes are useful for a variety of things including filtering suggestions and LLMs.
When filtering data, you might know a term, but not necessarily which column in the one of the join data contains it. Indexing the data on field names and high cardinality fields let's you qucikly find the term and the associated value.
Indexing could be used by LLMs to find the interesting column/term mapping in the data set.
fieldName - The path to the column in the source
fieldValue - The dimensional value for the field (or range if fieldType is not a string)
fieldType - Type type of the column.
weight - a weighting to use as to the importance the distinct dimensional value. Defaults to cardinality of the field.
Simple Example
We're going to take the airports table and index it. The results are an un ordered list of distinct fieldName/fieldValue pairs appear in the table. The weight, in this case is the number of rows that partciular occurs on.
run: duckdb.table('../data/airports.parquet') -> { index: * }
[ { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "09/1987", "weight": 54 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "08/1997", "weight": 22 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "03/2000", "weight": 61 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "02/1988", "weight": 45 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "01/1988", "weight": 34 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "08/1999", "weight": 31 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "07/1992", "weight": 13 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "06/1992", "weight": 22 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "02/1999", "weight": 82 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "03/1990", "weight": 72 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "11/1989", "weight": 108 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "11/1988", "weight": 113 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "10/1991", "weight": 36 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "05/1987", "weight": 48 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "10/1994", "weight": 47 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "08/1988", "weight": 115 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "11/1982", "weight": 42 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "08/1991", "weight": 58 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "06/1991", "weight": 102 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "07/1990", "weight": 80 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "07/1993", "weight": 15 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "08/1983", "weight": 19 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "03/1982", "weight": 29 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "10/1996", "weight": 26 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "06/1984", "weight": 65 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "12/1993", "weight": 22 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "01/1992", "weight": 68 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "04/1986", "weight": 65 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "08/1987", "weight": 62 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "10/2000", "weight": 9 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "09/1992", "weight": 11 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": "09/2000", "weight": 8 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "ANCHORAGE", "weight": 181 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "FAIRBANKS", "weight": 63 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "KETCHIKAN", "weight": 28 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "POINT BARROW", "weight": 20 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "SEWARD", "weight": 8 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "ST LOUIS", "weight": 847 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "GNC 20", "weight": 4 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "PHOENIX", "weight": 308 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "EL PASO", "weight": 54 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "SAN FRANCISCO", "weight": 427 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "SW", "weight": 2427 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "S", "weight": 2155 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "WNW", "weight": 11 }, { "fieldName": "cbd_dist", "fieldPath": "cbd_dist", "fieldType": "number", "fieldValue": "0 to 73", "weight": 19793 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "AS 05/1973", "weight": 94 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "LU 03/1978", "weight": 1 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "AS 04/1985", "weight": 1 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "AU 06/1974", "weight": 2 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "LU 12/1977", "weight": 2 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "AS 05/1974", "weight": 1 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "AS 02/1977", "weight": 1 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "ES 03/1973", "weight": 1 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "LU 04/1976", "weight": 1 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "BS 10/1998", "weight": 1 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "LU 05/1973", "weight": 37 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": "AU 02/1981", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "MILI ISLAND", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "WOTJE ATOLL", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "PALMYRA ISLAND ATOLL", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "WENO ISLAND", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "YAP ISLAND", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "BIRCHWOOD", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "KASILOF", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "GRANITE MOUNTAIN", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "STERLING", "weight": 12 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "WILLOW", "weight": 9 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "TALKEETNA", "weight": 4 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "JUNEAU", "weight": 6 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "MOUNTAIN VILLAGE", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "KOKHANOK", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "COUNCIL", "weight": 4 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "TOK", "weight": 4 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "PILOT POINT", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "NIGHTMUTE", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "BIRCH CREEK", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "KALAKAKET CREEK", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "KOYUK", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "BEAR CREEK", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "FAREWELL", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "QUINHAGAK", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "FAREWELL LAKE", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "TANANA", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "TAZLINA", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "GRAYLING", "weight": 4 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "WHITTIER", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SQUAW HARBOR", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "BARANOF", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "CLEAR", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "LAKE LOUISE", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "BARROW", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "NANCY LAKE", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "CHATHAM", "weight": 5 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "ELFIN COVE", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SLEETMUTE", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SHAGELUK", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "MIDDLETON ISLAND", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "HANNUM CREEK", "weight": 1 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "THORNE BAY", "weight": 1 } ]
WITH __stage0 AS ( SELECT group_set, CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldName", CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldPath", CASE group_set WHEN 0 THEN 'string' WHEN 1 THEN 'string' WHEN 2 THEN 'string' WHEN 3 THEN 'string' WHEN 4 THEN 'number' WHEN 5 THEN 'string' WHEN 6 THEN 'string' WHEN 7 THEN 'string' WHEN 8 THEN 'string' WHEN 9 THEN 'string' WHEN 10 THEN 'string' WHEN 11 THEN 'number' WHEN 12 THEN 'string' WHEN 13 THEN 'string' WHEN 14 THEN 'string' WHEN 15 THEN 'string' WHEN 16 THEN 'string' WHEN 17 THEN 'string' WHEN 18 THEN 'number' WHEN 19 THEN 'string' WHEN 20 THEN 'number' WHEN 21 THEN 'number' WHEN 22 THEN 'string' WHEN 23 THEN 'string' WHEN 24 THEN 'string' WHEN 25 THEN 'string' WHEN 26 THEN 'string' END as "fieldType", CASE group_set WHEN 99999 THEN CAST(NULL as VARCHAR) WHEN 0 THEN base."act_date" WHEN 1 THEN base."aero_cht" WHEN 2 THEN base."c_ldg_rts" WHEN 3 THEN base."cbd_dir" WHEN 5 THEN base."cert" WHEN 6 THEN base."city" WHEN 7 THEN base."cntl_twr" WHEN 8 THEN base."code" WHEN 9 THEN base."county" WHEN 10 THEN base."cust_intl" WHEN 12 THEN base."faa_dist" WHEN 13 THEN base."faa_region" WHEN 14 THEN base."fac_type" WHEN 15 THEN base."fac_use" WHEN 16 THEN base."fed_agree" WHEN 17 THEN base."full_name" WHEN 19 THEN base."joint_use" WHEN 22 THEN base."major" WHEN 23 THEN base."mil_rts" WHEN 24 THEN base."own_type" WHEN 25 THEN base."site_number" WHEN 26 THEN base."state" END as "fieldValue", COUNT(*) as "weight", CASE group_set WHEN 99999 THEN '' WHEN 4 THEN MIN(CAST(base."cbd_dist" as VARCHAR)) || ' to ' || CAST(MAX(base."cbd_dist") as VARCHAR) WHEN 11 THEN MIN(CAST(base."elevation" as VARCHAR)) || ' to ' || CAST(MAX(base."elevation") as VARCHAR) WHEN 18 THEN MIN(CAST(base."id" as VARCHAR)) || ' to ' || CAST(MAX(base."id") as VARCHAR) WHEN 20 THEN MIN(CAST(base."latitude" as VARCHAR)) || ' to ' || CAST(MAX(base."latitude") as VARCHAR) WHEN 21 THEN MIN(CAST(base."longitude" as VARCHAR)) || ' to ' || CAST(MAX(base."longitude") as VARCHAR) END as "fieldRange" FROM '../data/airports.parquet' as base CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,27,1)) as group_set ) as group_set GROUP BY 1,2,3,4,5 ) SELECT "fieldName", "fieldPath", "fieldType", COALESCE("fieldValue", "fieldRange") as "fieldValue", "weight" FROM __stage0
Add Ordering
Adding a second query stage to filter on string columns and ordering by weight descending shows us the most common fieldName/fieldValue pairs in the dataset.
All Malloy queries run as a single SQL query. The index:
operator is no different. Click the SQL tab to see how this works.
run: duckdb.table('../data/airports.parquet') -> { index: * } -> { where: fieldType = 'string' select: * order_by: weight desc }
[ { "fieldName": "major", "fieldPath": "major", "fieldType": "string", "fieldValue": "N", "weight": 19523 }, { "fieldName": "cert", "fieldPath": "cert", "fieldType": "string", "fieldValue": null, "weight": 19142 }, { "fieldName": "cntl_twr", "fieldPath": "cntl_twr", "fieldType": "string", "fieldValue": "N", "weight": 19124 }, { "fieldName": "fed_agree", "fieldPath": "fed_agree", "fieldType": "string", "fieldValue": null, "weight": 16252 }, { "fieldName": "c_ldg_rts", "fieldPath": "c_ldg_rts", "fieldType": "string", "fieldValue": null, "weight": 15145 }, { "fieldName": "cust_intl", "fieldPath": "cust_intl", "fieldType": "string", "fieldValue": null, "weight": 15145 }, { "fieldName": "joint_use", "fieldPath": "joint_use", "fieldType": "string", "fieldValue": null, "weight": 14804 }, { "fieldName": "mil_rts", "fieldPath": "mil_rts", "fieldType": "string", "fieldValue": null, "weight": 14716 }, { "fieldName": "fac_use", "fieldPath": "fac_use", "fieldType": "string", "fieldValue": "PR", "weight": 14428 }, { "fieldName": "own_type", "fieldPath": "own_type", "fieldType": "string", "fieldValue": "PR", "weight": 14306 }, { "fieldName": "fac_type", "fieldPath": "fac_type", "fieldType": "string", "fieldValue": "AIRPORT", "weight": 13925 }, { "fieldName": "act_date", "fieldPath": "act_date", "fieldType": "string", "fieldValue": null, "weight": 12040 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "NONE", "weight": 7085 }, { "fieldName": "fac_use", "fieldPath": "fac_use", "fieldType": "string", "fieldValue": "PU", "weight": 5365 }, { "fieldName": "own_type", "fieldPath": "own_type", "fieldType": "string", "fieldValue": "PU", "weight": 5174 }, { "fieldName": "fac_type", "fieldPath": "fac_type", "fieldType": "string", "fieldValue": "HELIPORT", "weight": 5135 }, { "fieldName": "joint_use", "fieldPath": "joint_use", "fieldType": "string", "fieldValue": "N", "weight": 4779 }, { "fieldName": "cust_intl", "fieldPath": "cust_intl", "fieldType": "string", "fieldValue": "N", "weight": 4575 }, { "fieldName": "faa_region", "fieldPath": "faa_region", "fieldType": "string", "fieldValue": "AGL", "weight": 4437 }, { "fieldName": "c_ldg_rts", "fieldPath": "c_ldg_rts", "fieldType": "string", "fieldValue": "N", "weight": 4365 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "N", "weight": 3694 }, { "fieldName": "faa_region", "fieldPath": "faa_region", "fieldType": "string", "fieldValue": "ASW", "weight": 3268 }, { "fieldName": "mil_rts", "fieldPath": "mil_rts", "fieldType": "string", "fieldValue": "Y", "weight": 2958 }, { "fieldName": "faa_region", "fieldPath": "faa_region", "fieldType": "string", "fieldValue": "ASO", "weight": 2924 }, { "fieldName": "faa_region", "fieldPath": "faa_region", "fieldType": "string", "fieldValue": "AEA", "weight": 2586 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "NW", "weight": 2438 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "SW", "weight": 2427 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "NE", "weight": 2393 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "SE", "weight": 2281 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "S", "weight": 2155 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "E", "weight": 2127 }, { "fieldName": "mil_rts", "fieldPath": "mil_rts", "fieldType": "string", "fieldValue": "N", "weight": 2119 }, { "fieldName": "faa_region", "fieldPath": "faa_region", "fieldType": "string", "fieldValue": "ANM", "weight": 2102 }, { "fieldName": "cbd_dir", "fieldPath": "cbd_dir", "fieldType": "string", "fieldValue": "W", "weight": 2065 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "TX", "weight": 1845 }, { "fieldName": "fed_agree", "fieldPath": "fed_agree", "fieldType": "string", "fieldValue": "NGY", "weight": 1682 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "NEW YORK", "weight": 1581 }, { "fieldName": "faa_region", "fieldPath": "faa_region", "fieldType": "string", "fieldValue": "ACE", "weight": 1579 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "CHI", "weight": 1528 }, { "fieldName": "faa_region", "fieldPath": "faa_region", "fieldType": "string", "fieldValue": "AWP", "weight": 1503 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "CHICAGO", "weight": 1480 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "DETROIT", "weight": 1262 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "DET", "weight": 1240 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "SEA", "weight": 1162 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "MSP", "weight": 1046 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "ATL", "weight": 1029 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "CA", "weight": 984 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "NYC", "weight": 957 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "ORL", "weight": 905 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "IL", "weight": 890 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "DALLAS-FT WORTH", "weight": 888 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "WASHINGTON", "weight": 887 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "FL", "weight": 856 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "ST LOUIS", "weight": 847 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "HAR", "weight": 846 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "PA", "weight": 804 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "ATLANTA", "weight": 780 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "HOUSTON", "weight": 765 }, { "fieldName": "faa_region", "fieldPath": "faa_region", "fieldType": "string", "fieldValue": "ANE", "weight": 763 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "SEATTLE", "weight": 749 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "OH", "weight": 749 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "KANSAS CITY", "weight": 713 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "TWIN CITIES", "weight": 699 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "DEN", "weight": 679 }, { "fieldName": "cntl_twr", "fieldPath": "cntl_twr", "fieldType": "string", "fieldValue": "Y", "weight": 669 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "DCA", "weight": 667 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "IN", "weight": 643 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "CINCINNATI", "weight": 624 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "BIS", "weight": 616 }, { "fieldName": "faa_region", "fieldPath": "faa_region", "fieldType": "string", "fieldValue": "AAL", "weight": 608 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "AK", "weight": 608 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "MEMPHIS", "weight": 605 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "SFO", "weight": 585 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "NY", "weight": 576 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "OMAHA", "weight": 551 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "WI", "weight": 543 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "MO", "weight": 537 }, { "fieldName": "fed_agree", "fieldPath": "fed_agree", "fieldType": "string", "fieldValue": "N", "weight": 515 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "MN", "weight": 507 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "SAN ANTONIO", "weight": 503 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "JAN", "weight": 502 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "LA", "weight": 500 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "LOS ANGELES", "weight": 498 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "MI", "weight": 489 }, { "fieldName": "faa_dist", "fieldPath": "faa_dist", "fieldType": "string", "fieldValue": "MEM", "weight": 487 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "WA", "weight": 484 }, { "fieldName": "fac_type", "fieldPath": "fac_type", "fieldType": "string", "fieldValue": "SEAPLANE BASE", "weight": 473 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "JACKSONVILLE", "weight": 467 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "NEW ORLEANS", "weight": 444 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "OK", "weight": 443 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "OR", "weight": 441 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "GA", "weight": 440 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "ND", "weight": 436 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "SAN FRANCISCO", "weight": 427 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "CO", "weight": 425 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "VA", "weight": 421 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "GREEN BAY", "weight": 418 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "KS", "weight": 415 }, { "fieldName": "state", "fieldPath": "state", "fieldType": "string", "fieldValue": "NC", "weight": 400 }, { "fieldName": "aero_cht", "fieldPath": "aero_cht", "fieldType": "string", "fieldValue": "CHARLOTTE", "weight": 380 } ]
WITH __stage0 AS ( SELECT group_set, CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldName", CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldPath", CASE group_set WHEN 0 THEN 'string' WHEN 1 THEN 'string' WHEN 2 THEN 'string' WHEN 3 THEN 'string' WHEN 4 THEN 'number' WHEN 5 THEN 'string' WHEN 6 THEN 'string' WHEN 7 THEN 'string' WHEN 8 THEN 'string' WHEN 9 THEN 'string' WHEN 10 THEN 'string' WHEN 11 THEN 'number' WHEN 12 THEN 'string' WHEN 13 THEN 'string' WHEN 14 THEN 'string' WHEN 15 THEN 'string' WHEN 16 THEN 'string' WHEN 17 THEN 'string' WHEN 18 THEN 'number' WHEN 19 THEN 'string' WHEN 20 THEN 'number' WHEN 21 THEN 'number' WHEN 22 THEN 'string' WHEN 23 THEN 'string' WHEN 24 THEN 'string' WHEN 25 THEN 'string' WHEN 26 THEN 'string' END as "fieldType", CASE group_set WHEN 99999 THEN CAST(NULL as VARCHAR) WHEN 0 THEN base."act_date" WHEN 1 THEN base."aero_cht" WHEN 2 THEN base."c_ldg_rts" WHEN 3 THEN base."cbd_dir" WHEN 5 THEN base."cert" WHEN 6 THEN base."city" WHEN 7 THEN base."cntl_twr" WHEN 8 THEN base."code" WHEN 9 THEN base."county" WHEN 10 THEN base."cust_intl" WHEN 12 THEN base."faa_dist" WHEN 13 THEN base."faa_region" WHEN 14 THEN base."fac_type" WHEN 15 THEN base."fac_use" WHEN 16 THEN base."fed_agree" WHEN 17 THEN base."full_name" WHEN 19 THEN base."joint_use" WHEN 22 THEN base."major" WHEN 23 THEN base."mil_rts" WHEN 24 THEN base."own_type" WHEN 25 THEN base."site_number" WHEN 26 THEN base."state" END as "fieldValue", COUNT(*) as "weight", CASE group_set WHEN 99999 THEN '' WHEN 4 THEN MIN(CAST(base."cbd_dist" as VARCHAR)) || ' to ' || CAST(MAX(base."cbd_dist") as VARCHAR) WHEN 11 THEN MIN(CAST(base."elevation" as VARCHAR)) || ' to ' || CAST(MAX(base."elevation") as VARCHAR) WHEN 18 THEN MIN(CAST(base."id" as VARCHAR)) || ' to ' || CAST(MAX(base."id") as VARCHAR) WHEN 20 THEN MIN(CAST(base."latitude" as VARCHAR)) || ' to ' || CAST(MAX(base."latitude") as VARCHAR) WHEN 21 THEN MIN(CAST(base."longitude" as VARCHAR)) || ' to ' || CAST(MAX(base."longitude") as VARCHAR) END as "fieldRange" FROM '../data/airports.parquet' as base CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,27,1)) as group_set ) as group_set GROUP BY 1,2,3,4,5 ) , __stage1 AS ( SELECT "fieldName", "fieldPath", "fieldType", COALESCE("fieldValue", "fieldRange") as "fieldValue", "weight" FROM __stage0 ) SELECT base."fieldName" as "fieldName", base."fieldPath" as "fieldPath", base."fieldType" as "fieldType", base."fieldValue" as "fieldValue", base."weight" as "weight" FROM __stage1 as base WHERE base."fieldType"='string' ORDER BY 5 desc NULLS LAST
Index For Filtering User Interfaces
Indexes can be used find the best way to filter a dataset. For example supposed we'd like to find 'SANTA CRUZ' in the dataset. Upon approaching the dataset, but we don't which column might contain it. In a UI you might imagine that you type 'SANTA' and let have suggestons for values that might be appropriate. In the results we can see that top value, 'SANTA ROSA', appears as county on 26 rows in the table. We can also see that 'SANTA CRUZ' is both a city
and a county
..
run: duckdb.table('../data/airports.parquet') -> { index: * } -> { where: fieldValue ~ r'SANTA' select: * order_by: weight desc limit: 15 }
[ { "fieldName": "county", "fieldPath": "county", "fieldType": "string", "fieldValue": "SANTA ROSA", "weight": 26 }, { "fieldName": "county", "fieldPath": "county", "fieldType": "string", "fieldValue": "SANTA BARBARA", "weight": 22 }, { "fieldName": "county", "fieldPath": "county", "fieldType": "string", "fieldValue": "SANTA CLARA", "weight": 10 }, { "fieldName": "county", "fieldPath": "county", "fieldType": "string", "fieldValue": "SANTA CRUZ", "weight": 10 }, { "fieldName": "county", "fieldPath": "county", "fieldType": "string", "fieldValue": "SANTA FE", "weight": 9 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SANTA ANA", "weight": 6 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SANTA FE", "weight": 6 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SANTA BARBARA", "weight": 5 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SANTA ROSA", "weight": 4 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SANTA MARIA", "weight": 3 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SANTA YNEZ", "weight": 3 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SANTA ELENA", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SANTA CRUZ", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "SANTA YSABEL", "weight": 2 }, { "fieldName": "city", "fieldPath": "city", "fieldType": "string", "fieldValue": "RANCHO SANTA MARGARITA", "weight": 2 } ]
WITH __stage0 AS ( SELECT group_set, CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldName", CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldPath", CASE group_set WHEN 0 THEN 'string' WHEN 1 THEN 'string' WHEN 2 THEN 'string' WHEN 3 THEN 'string' WHEN 4 THEN 'number' WHEN 5 THEN 'string' WHEN 6 THEN 'string' WHEN 7 THEN 'string' WHEN 8 THEN 'string' WHEN 9 THEN 'string' WHEN 10 THEN 'string' WHEN 11 THEN 'number' WHEN 12 THEN 'string' WHEN 13 THEN 'string' WHEN 14 THEN 'string' WHEN 15 THEN 'string' WHEN 16 THEN 'string' WHEN 17 THEN 'string' WHEN 18 THEN 'number' WHEN 19 THEN 'string' WHEN 20 THEN 'number' WHEN 21 THEN 'number' WHEN 22 THEN 'string' WHEN 23 THEN 'string' WHEN 24 THEN 'string' WHEN 25 THEN 'string' WHEN 26 THEN 'string' END as "fieldType", CASE group_set WHEN 99999 THEN CAST(NULL as VARCHAR) WHEN 0 THEN base."act_date" WHEN 1 THEN base."aero_cht" WHEN 2 THEN base."c_ldg_rts" WHEN 3 THEN base."cbd_dir" WHEN 5 THEN base."cert" WHEN 6 THEN base."city" WHEN 7 THEN base."cntl_twr" WHEN 8 THEN base."code" WHEN 9 THEN base."county" WHEN 10 THEN base."cust_intl" WHEN 12 THEN base."faa_dist" WHEN 13 THEN base."faa_region" WHEN 14 THEN base."fac_type" WHEN 15 THEN base."fac_use" WHEN 16 THEN base."fed_agree" WHEN 17 THEN base."full_name" WHEN 19 THEN base."joint_use" WHEN 22 THEN base."major" WHEN 23 THEN base."mil_rts" WHEN 24 THEN base."own_type" WHEN 25 THEN base."site_number" WHEN 26 THEN base."state" END as "fieldValue", COUNT(*) as "weight", CASE group_set WHEN 99999 THEN '' WHEN 4 THEN MIN(CAST(base."cbd_dist" as VARCHAR)) || ' to ' || CAST(MAX(base."cbd_dist") as VARCHAR) WHEN 11 THEN MIN(CAST(base."elevation" as VARCHAR)) || ' to ' || CAST(MAX(base."elevation") as VARCHAR) WHEN 18 THEN MIN(CAST(base."id" as VARCHAR)) || ' to ' || CAST(MAX(base."id") as VARCHAR) WHEN 20 THEN MIN(CAST(base."latitude" as VARCHAR)) || ' to ' || CAST(MAX(base."latitude") as VARCHAR) WHEN 21 THEN MIN(CAST(base."longitude" as VARCHAR)) || ' to ' || CAST(MAX(base."longitude") as VARCHAR) END as "fieldRange" FROM '../data/airports.parquet' as base CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,27,1)) as group_set ) as group_set GROUP BY 1,2,3,4,5 ) , __stage1 AS ( SELECT "fieldName", "fieldPath", "fieldType", COALESCE("fieldValue", "fieldRange") as "fieldValue", "weight" FROM __stage0 ) SELECT base."fieldName" as "fieldName", base."fieldPath" as "fieldPath", base."fieldType" as "fieldType", base."fieldValue" as "fieldValue", base."weight" as "weight" FROM __stage1 as base WHERE REGEXP_MATCHES(base."fieldValue",'SANTA') ORDER BY 5 desc NULLS LAST LIMIT 15
We can then write a simple query to show the rows. It turns out that 'SANTA CRUZ' is a county in both California and Arizona.
run: duckdb.table('../data/airports.parquet') -> { where: county ~ 'SANTA CRUZ' select: * }
[ { "act_date": "10/1987", "aero_cht": "PHOENIX", "c_ldg_rts": null, "cbd_dir": "E", "cbd_dist": 1, "cert": null, "city": "TUBAC", "cntl_twr": "N", "code": "2AZ8", "county": "SANTA CRUZ", "cust_intl": null, "elevation": 3200, "faa_dist": "NONE", "faa_region": "AWP", "fac_type": "ULTRALIGHT", "fac_use": "PR", "fed_agree": null, "full_name": "TUBAC ULTRALIGHT FLIGHTPARK", "id": 1444, "joint_use": null, "latitude": 31.61, "longitude": -111.03, "major": "N", "mil_rts": null, "own_type": "PR", "site_number": "00811.*U", "state": "AZ" }, { "act_date": null, "aero_cht": "PHOENIX", "c_ldg_rts": "N", "cbd_dir": "NE", "cbd_dist": 7, "cert": null, "city": "NOGALES", "cntl_twr": "N", "code": "OLS", "county": "SANTA CRUZ", "cust_intl": "Y", "elevation": 3955, "faa_dist": "NONE", "faa_region": "AWP", "fac_type": "AIRPORT", "fac_use": "PU", "fed_agree": "NGY", "full_name": "NOGALES INTL", "id": 1312, "joint_use": "N", "latitude": 31.41, "longitude": -110.84, "major": "N", "mil_rts": "Y", "own_type": "PU", "site_number": "00739.*A", "state": "AZ" }, { "act_date": null, "aero_cht": "LOS ANGELES", "c_ldg_rts": null, "cbd_dir": null, "cbd_dist": 0, "cert": null, "city": "SAN NICOLAS ISLAND", "cntl_twr": "Y", "code": "NSI", "county": "SANTA CRUZ", "cust_intl": null, "elevation": 504, "faa_dist": "NONE", "faa_region": "AWP", "fac_type": "AIRPORT", "fac_use": "PR", "fed_agree": null, "full_name": "SAN NICOLAS ISLAND NOLF", "id": 2287, "joint_use": null, "latitude": 33.23, "longitude": -119.45, "major": "N", "mil_rts": "Y", "own_type": "MN", "site_number": "02220.*A", "state": "CA" }, { "act_date": "07/1986", "aero_cht": "SAN FRANCISCO", "c_ldg_rts": null, "cbd_dir": "NW", "cbd_dist": 8, "cert": null, "city": "SANTA CRUZ", "cntl_twr": "N", "code": "CL77", "county": "SANTA CRUZ", "cust_intl": null, "elevation": 2020, "faa_dist": "SFO", "faa_region": "AWP", "fac_type": "AIRPORT", "fac_use": "PR", "fed_agree": null, "full_name": "BONNY DOON VILLAGE", "id": 2307, "joint_use": null, "latitude": 37.07, "longitude": -122.12, "major": "N", "mil_rts": null, "own_type": "PR", "site_number": "02241.1*A", "state": "CA" }, { "act_date": null, "aero_cht": "SAN FRANCISCO", "c_ldg_rts": null, "cbd_dir": "NE", "cbd_dist": 3, "cert": null, "city": "SANTA CRUZ", "cntl_twr": "N", "code": "CA37", "county": "SANTA CRUZ", "cust_intl": null, "elevation": 115, "faa_dist": "SFO", "faa_region": "AWP", "fac_type": "HELIPORT", "fac_use": "PR", "fed_agree": null, "full_name": "DOMINICAN SANTA CRUZ HOSPITAL", "id": 2308, "joint_use": null, "latitude": 36.99, "longitude": -121.98, "major": "N", "mil_rts": null, "own_type": "PR", "site_number": "02241.12*H", "state": "CA" }, { "act_date": null, "aero_cht": "SAN FRANCISCO", "c_ldg_rts": null, "cbd_dir": "NW", "cbd_dist": 6, "cert": null, "city": "DAVENPORT", "cntl_twr": "N", "code": "6Q6", "county": "SANTA CRUZ", "cust_intl": null, "elevation": 125, "faa_dist": "SFO", "faa_region": "AWP", "fac_type": "AIRPORT", "fac_use": "PR", "fed_agree": null, "full_name": "LAS TRANCAS", "id": 1693, "joint_use": "N", "latitude": 37.08, "longitude": -122.27, "major": "N", "mil_rts": "N", "own_type": "PR", "site_number": "01486.9*A", "state": "CA" }, { "act_date": null, "aero_cht": "SAN FRANCISCO", "c_ldg_rts": null, "cbd_dir": "NW", "cbd_dist": 5, "cert": null, "city": "WATSONVILLE", "cntl_twr": "N", "code": "CA65", "county": "SANTA CRUZ", "cust_intl": null, "elevation": 480, "faa_dist": "SFO", "faa_region": "AWP", "fac_type": "HELIPORT", "fac_use": "PR", "fed_agree": null, "full_name": "ALTA VISTA", "id": 2441, "joint_use": "N", "latitude": 36.97, "longitude": -121.86, "major": "N", "mil_rts": "N", "own_type": "PR", "site_number": "02429.1*H", "state": "CA" }, { "act_date": "09/1987", "aero_cht": "SAN FRANCISCO", "c_ldg_rts": null, "cbd_dir": "E", "cbd_dist": 0, "cert": null, "city": "WATSONVILLE", "cntl_twr": "N", "code": "CL99", "county": "SANTA CRUZ", "cust_intl": null, "elevation": 111, "faa_dist": "SFO", "faa_region": "AWP", "fac_type": "HELIPORT", "fac_use": "PR", "fed_agree": null, "full_name": "WATSONVILLE COMMUNITY HOSPITAL", "id": 2442, "joint_use": null, "latitude": 36.93, "longitude": -121.77, "major": "N", "mil_rts": null, "own_type": "PU", "site_number": "02429.11*H", "state": "CA" }, { "act_date": null, "aero_cht": "SAN FRANCISCO", "c_ldg_rts": null, "cbd_dir": "W", "cbd_dist": 4, "cert": null, "city": "WATSONVILLE", "cntl_twr": "N", "code": "CA66", "county": "SANTA CRUZ", "cust_intl": null, "elevation": 70, "faa_dist": "SFO", "faa_region": "AWP", "fac_type": "AIRPORT", "fac_use": "PR", "fed_agree": null, "full_name": "MONTEREY BAY ACADEMY", "id": 2443, "joint_use": "N", "latitude": 36.9, "longitude": -121.84, "major": "N", "mil_rts": "N", "own_type": "PR", "site_number": "02429.2*A", "state": "CA" }, { "act_date": null, "aero_cht": "SAN FRANCISCO", "c_ldg_rts": "N", "cbd_dir": "NW", "cbd_dist": 3, "cert": null, "city": "WATSONVILLE", "cntl_twr": "N", "code": "WVI", "county": "SANTA CRUZ", "cust_intl": "N", "elevation": 160, "faa_dist": "SFO", "faa_region": "AWP", "fac_type": "AIRPORT", "fac_use": "PU", "fed_agree": "NGPRY", "full_name": "WATSONVILLE MUNI", "id": 2440, "joint_use": "N", "latitude": 36.93, "longitude": -121.78, "major": "N", "mil_rts": "Y", "own_type": "PU", "site_number": "02429.*A", "state": "CA" } ]
SELECT base."act_date" as "act_date", base."aero_cht" as "aero_cht", base."c_ldg_rts" as "c_ldg_rts", base."cbd_dir" as "cbd_dir", base."cbd_dist" as "cbd_dist", base."cert" as "cert", base."city" as "city", base."cntl_twr" as "cntl_twr", base."code" as "code", base."county" as "county", base."cust_intl" as "cust_intl", base."elevation" as "elevation", base."faa_dist" as "faa_dist", base."faa_region" as "faa_region", base."fac_type" as "fac_type", base."fac_use" as "fac_use", base."fed_agree" as "fed_agree", base."full_name" as "full_name", base."id" as "id", base."joint_use" as "joint_use", base."latitude" as "latitude", base."longitude" as "longitude", base."major" as "major", base."mil_rts" as "mil_rts", base."own_type" as "own_type", base."site_number" as "site_number", base."state" as "state" FROM '../data/airports.parquet' as base WHERE base."county" LIKE 'SANTA CRUZ'
Indexing to show top values for each dimension
It is often difficult to approach a new dataset. The index operator provides an intersting way to quickly gain an understanding of the dataset. By piping the results of an index another stage, we can quickly see all the interesting values for each of the interesting dimesions. Again, the weight shows the number of rows for that particular dimension/value.
run: duckdb.table('../data/airports.parquet') -> { index: * } -> { group_by: fieldName nest: values is { group_by: fieldValue, weight order_by: weight desc limit: 10 } order_by: fieldName }
[ { "fieldName": "act_date", "values": [ { "fieldValue": null, "weight": 12040 }, { "fieldValue": "04/1993", "weight": 145 }, { "fieldValue": "02/1987", "weight": 129 }, { "fieldValue": "03/1993", "weight": 127 }, { "fieldValue": "11/1990", "weight": 122 }, { "fieldValue": "11/1986", "weight": 119 }, { "fieldValue": "02/1982", "weight": 118 }, { "fieldValue": "08/1988", "weight": 115 }, { "fieldValue": "11/1988", "weight": 113 }, { "fieldValue": "03/1988", "weight": 113 } ] }, { "fieldName": "aero_cht", "values": [ { "fieldValue": "NEW YORK", "weight": 1581 }, { "fieldValue": "CHICAGO", "weight": 1480 }, { "fieldValue": "DETROIT", "weight": 1262 }, { "fieldValue": "DALLAS-FT WORTH", "weight": 888 }, { "fieldValue": "WASHINGTON", "weight": 887 }, { "fieldValue": "ST LOUIS", "weight": 847 }, { "fieldValue": "ATLANTA", "weight": 780 }, { "fieldValue": "HOUSTON", "weight": 765 }, { "fieldValue": "SEATTLE", "weight": 749 }, { "fieldValue": "KANSAS CITY", "weight": 713 } ] }, { "fieldName": "c_ldg_rts", "values": [ { "fieldValue": null, "weight": 15145 }, { "fieldValue": "N", "weight": 4365 }, { "fieldValue": "Y", "weight": 283 } ] }, { "fieldName": "cbd_dir", "values": [ { "fieldValue": "N", "weight": 3694 }, { "fieldValue": "NW", "weight": 2438 }, { "fieldValue": "SW", "weight": 2427 }, { "fieldValue": "NE", "weight": 2393 }, { "fieldValue": "SE", "weight": 2281 }, { "fieldValue": "S", "weight": 2155 }, { "fieldValue": "E", "weight": 2127 }, { "fieldValue": "W", "weight": 2065 }, { "fieldValue": null, "weight": 119 }, { "fieldValue": "SSW", "weight": 20 } ] }, { "fieldName": "cbd_dist", "values": [ { "fieldValue": "0 to 73", "weight": 19793 } ] }, { "fieldName": "cert", "values": [ { "fieldValue": null, "weight": 19142 }, { "fieldValue": "AS 05/1973", "weight": 94 }, { "fieldValue": "CS 05/1973", "weight": 84 }, { "fieldValue": "BS 05/1973", "weight": 81 }, { "fieldValue": "LU 05/1973", "weight": 37 }, { "fieldValue": "LU 08/1990", "weight": 33 }, { "fieldValue": "DS 05/1973", "weight": 26 }, { "fieldValue": "AU 05/1973", "weight": 25 }, { "fieldValue": "ES 05/1973", "weight": 15 }, { "fieldValue": "LU 11/1974", "weight": 9 } ] }, { "fieldName": "city", "values": [ { "fieldValue": "HOUSTON", "weight": 108 }, { "fieldValue": "LOS ANGELES", "weight": 60 }, { "fieldValue": "COLUMBUS", "weight": 47 }, { "fieldValue": "SPRINGFIELD", "weight": 45 }, { "fieldValue": "JACKSON", "weight": 42 }, { "fieldValue": "GREENVILLE", "weight": 38 }, { "fieldValue": "WASHINGTON", "weight": 38 }, { "fieldValue": "CLINTON", "weight": 37 }, { "fieldValue": "PHOENIX", "weight": 37 }, { "fieldValue": "PHILADELPHIA", "weight": 35 } ] }, { "fieldName": "cntl_twr", "values": [ { "fieldValue": "N", "weight": 19124 }, { "fieldValue": "Y", "weight": 669 } ] }, { "fieldName": "code", "values": [ { "fieldValue": "1Q9", "weight": 1 }, { "fieldValue": "AK41", "weight": 1 }, { "fieldValue": "1AK6", "weight": 1 }, { "fieldValue": "GSZ", "weight": 1 }, { "fieldValue": "Z70", "weight": 1 }, { "fieldValue": "5BK", "weight": 1 }, { "fieldValue": "AK94", "weight": 1 }, { "fieldValue": "9K2", "weight": 1 }, { "fieldValue": "IKO", "weight": 1 }, { "fieldValue": "3AK1", "weight": 1 } ] }, { "fieldName": "county", "values": [ { "fieldValue": "WASHINGTON", "weight": 214 }, { "fieldValue": "JEFFERSON", "weight": 199 }, { "fieldValue": "LOS ANGELES", "weight": 176 }, { "fieldValue": "MONTGOMERY", "weight": 154 }, { "fieldValue": "JACKSON", "weight": 140 }, { "fieldValue": "FRANKLIN", "weight": 140 }, { "fieldValue": "HARRIS", "weight": 137 }, { "fieldValue": "MARION", "weight": 127 }, { "fieldValue": "ORANGE", "weight": 125 }, { "fieldValue": "MARICOPA", "weight": 117 } ] }, { "fieldName": "cust_intl", "values": [ { "fieldValue": null, "weight": 15145 }, { "fieldValue": "N", "weight": 4575 }, { "fieldValue": "Y", "weight": 73 } ] }, { "fieldName": "elevation", "values": [ { "fieldValue": "-1 to 12442", "weight": 19793 } ] }, { "fieldName": "faa_dist", "values": [ { "fieldValue": "NONE", "weight": 7085 }, { "fieldValue": "CHI", "weight": 1528 }, { "fieldValue": "DET", "weight": 1240 }, { "fieldValue": "SEA", "weight": 1162 }, { "fieldValue": "MSP", "weight": 1046 }, { "fieldValue": "ATL", "weight": 1029 }, { "fieldValue": "NYC", "weight": 957 }, { "fieldValue": "ORL", "weight": 905 }, { "fieldValue": "HAR", "weight": 846 }, { "fieldValue": "DEN", "weight": 679 } ] }, { "fieldName": "faa_region", "values": [ { "fieldValue": "AGL", "weight": 4437 }, { "fieldValue": "ASW", "weight": 3268 }, { "fieldValue": "ASO", "weight": 2924 }, { "fieldValue": "AEA", "weight": 2586 }, { "fieldValue": "ANM", "weight": 2102 }, { "fieldValue": "ACE", "weight": 1579 }, { "fieldValue": "AWP", "weight": 1503 }, { "fieldValue": "ANE", "weight": 763 }, { "fieldValue": "AAL", "weight": 608 }, { "fieldValue": null, "weight": 23 } ] }, { "fieldName": "fac_type", "values": [ { "fieldValue": "AIRPORT", "weight": 13925 }, { "fieldValue": "HELIPORT", "weight": 5135 }, { "fieldValue": "SEAPLANE BASE", "weight": 473 }, { "fieldValue": "ULTRALIGHT", "weight": 125 }, { "fieldValue": "STOLPORT", "weight": 86 }, { "fieldValue": "GLIDERPORT", "weight": 37 }, { "fieldValue": "BALLOONPORT", "weight": 12 } ] }, { "fieldName": "fac_use", "values": [ { "fieldValue": "PR", "weight": 14428 }, { "fieldValue": "PU", "weight": 5365 } ] }, { "fieldName": "fed_agree", "values": [ { "fieldValue": null, "weight": 16252 }, { "fieldValue": "NGY", "weight": 1682 }, { "fieldValue": "N", "weight": 515 }, { "fieldValue": "NGY3", "weight": 219 }, { "fieldValue": "NGPY", "weight": 171 }, { "fieldValue": "NY1", "weight": 123 }, { "fieldValue": "N1", "weight": 97 }, { "fieldValue": "1", "weight": 96 }, { "fieldValue": "NGSY", "weight": 87 }, { "fieldValue": "NGPY3", "weight": 84 } ] }, { "fieldName": "full_name", "values": [ { "fieldValue": "MEMORIAL HOSPITAL", "weight": 21 }, { "fieldValue": "SMITH", "weight": 14 }, { "fieldValue": "JOHNSON", "weight": 13 }, { "fieldValue": "ST MARY'S HOSPITAL", "weight": 11 }, { "fieldValue": "MILLER", "weight": 10 }, { "fieldValue": "DAVIS FIELD", "weight": 10 }, { "fieldValue": "HILLTOP", "weight": 10 }, { "fieldValue": "TAYLOR", "weight": 9 }, { "fieldValue": "WILSON", "weight": 9 }, { "fieldValue": "FLYING W", "weight": 9 } ] }, { "fieldName": "id", "values": [ { "fieldValue": "1 to 19793", "weight": 19793 } ] }, { "fieldName": "joint_use", "values": [ { "fieldValue": null, "weight": 14804 }, { "fieldValue": "N", "weight": 4779 }, { "fieldValue": "Y", "weight": 210 } ] }, { "fieldName": "latitude", "values": [ { "fieldValue": "-14.18 to 71.28", "weight": 19793 } ] }, { "fieldName": "longitude", "values": [ { "fieldValue": "-100.0 to 174.11", "weight": 19793 } ] }, { "fieldName": "major", "values": [ { "fieldValue": "N", "weight": 19523 }, { "fieldValue": "Y", "weight": 270 } ] }, { "fieldName": "mil_rts", "values": [ { "fieldValue": null, "weight": 14716 }, { "fieldValue": "Y", "weight": 2958 }, { "fieldValue": "N", "weight": 2119 } ] }, { "fieldName": "own_type", "values": [ { "fieldValue": "PR", "weight": 14306 }, { "fieldValue": "PU", "weight": 5174 }, { "fieldValue": "MR", "weight": 128 }, { "fieldValue": "MA", "weight": 107 }, { "fieldValue": "MN", "weight": 78 } ] }, { "fieldName": "site_number", "values": [ { "fieldValue": "51542.*A", "weight": 1 }, { "fieldValue": "51550.01*A", "weight": 1 }, { "fieldValue": "50412.1*C", "weight": 1 }, { "fieldValue": "50406.1*A", "weight": 1 }, { "fieldValue": "50280.2*A", "weight": 1 }, { "fieldValue": "50146.012*A", "weight": 1 }, { "fieldValue": "50870.31*A", "weight": 1 }, { "fieldValue": "50681.11*A", "weight": 1 }, { "fieldValue": "50040.5*A", "weight": 1 }, { "fieldValue": "50215.01*A", "weight": 1 } ] }, { "fieldName": "state", "values": [ { "fieldValue": "TX", "weight": 1845 }, { "fieldValue": "CA", "weight": 984 }, { "fieldValue": "IL", "weight": 890 }, { "fieldValue": "FL", "weight": 856 }, { "fieldValue": "PA", "weight": 804 }, { "fieldValue": "OH", "weight": 749 }, { "fieldValue": "IN", "weight": 643 }, { "fieldValue": "AK", "weight": 608 }, { "fieldValue": "NY", "weight": 576 }, { "fieldValue": "WI", "weight": 543 } ] }, { "fieldName": null, "values": [ { "fieldValue": null, "weight": 19793 } ] } ]
WITH __stage0 AS ( SELECT group_set, CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldName", CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldPath", CASE group_set WHEN 0 THEN 'string' WHEN 1 THEN 'string' WHEN 2 THEN 'string' WHEN 3 THEN 'string' WHEN 4 THEN 'number' WHEN 5 THEN 'string' WHEN 6 THEN 'string' WHEN 7 THEN 'string' WHEN 8 THEN 'string' WHEN 9 THEN 'string' WHEN 10 THEN 'string' WHEN 11 THEN 'number' WHEN 12 THEN 'string' WHEN 13 THEN 'string' WHEN 14 THEN 'string' WHEN 15 THEN 'string' WHEN 16 THEN 'string' WHEN 17 THEN 'string' WHEN 18 THEN 'number' WHEN 19 THEN 'string' WHEN 20 THEN 'number' WHEN 21 THEN 'number' WHEN 22 THEN 'string' WHEN 23 THEN 'string' WHEN 24 THEN 'string' WHEN 25 THEN 'string' WHEN 26 THEN 'string' END as "fieldType", CASE group_set WHEN 99999 THEN CAST(NULL as VARCHAR) WHEN 0 THEN base."act_date" WHEN 1 THEN base."aero_cht" WHEN 2 THEN base."c_ldg_rts" WHEN 3 THEN base."cbd_dir" WHEN 5 THEN base."cert" WHEN 6 THEN base."city" WHEN 7 THEN base."cntl_twr" WHEN 8 THEN base."code" WHEN 9 THEN base."county" WHEN 10 THEN base."cust_intl" WHEN 12 THEN base."faa_dist" WHEN 13 THEN base."faa_region" WHEN 14 THEN base."fac_type" WHEN 15 THEN base."fac_use" WHEN 16 THEN base."fed_agree" WHEN 17 THEN base."full_name" WHEN 19 THEN base."joint_use" WHEN 22 THEN base."major" WHEN 23 THEN base."mil_rts" WHEN 24 THEN base."own_type" WHEN 25 THEN base."site_number" WHEN 26 THEN base."state" END as "fieldValue", COUNT(*) as "weight", CASE group_set WHEN 99999 THEN '' WHEN 4 THEN MIN(CAST(base."cbd_dist" as VARCHAR)) || ' to ' || CAST(MAX(base."cbd_dist") as VARCHAR) WHEN 11 THEN MIN(CAST(base."elevation" as VARCHAR)) || ' to ' || CAST(MAX(base."elevation") as VARCHAR) WHEN 18 THEN MIN(CAST(base."id" as VARCHAR)) || ' to ' || CAST(MAX(base."id") as VARCHAR) WHEN 20 THEN MIN(CAST(base."latitude" as VARCHAR)) || ' to ' || CAST(MAX(base."latitude") as VARCHAR) WHEN 21 THEN MIN(CAST(base."longitude" as VARCHAR)) || ' to ' || CAST(MAX(base."longitude") as VARCHAR) END as "fieldRange" FROM '../data/airports.parquet' as base CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,27,1)) as group_set ) as group_set GROUP BY 1,2,3,4,5 ) , __stage1 AS ( SELECT "fieldName", "fieldPath", "fieldType", COALESCE("fieldValue", "fieldRange") as "fieldValue", "weight" FROM __stage0 ) , __stage2 AS ( SELECT group_set, base."fieldName" as "fieldName__0", CASE WHEN group_set=1 THEN base."fieldValue" END as "fieldValue__1", CASE WHEN group_set=1 THEN base."weight" END as "weight__1" FROM __stage1 as base CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set GROUP BY 1,2,3,4 ) SELECT "fieldName__0" as "fieldName", COALESCE(LIST({ "fieldValue": "fieldValue__1", "weight": "weight__1"} ORDER BY "weight__1" desc NULLS LAST) FILTER (WHERE group_set=1)[1:10],[]) as "values" FROM __stage2 GROUP BY 1 ORDER BY 1 ASC NULLS LAST
Sampling
With large datasets, you can also sample a small subsection using the sample:
parameter. Sampled indexes are great at identifing the important low cardinality fields.
run: duckdb.table('../data/airports.parquet') -> { index: * sample: 5000 // sample only 5000 rows } -> { group_by: fieldName nest: values is { group_by: fieldValue, weight order_by: weight desc limit: 10 } order_by: fieldName }
[ { "fieldName": "act_date", "values": [ { "fieldValue": null, "weight": 3052 }, { "fieldValue": "03/1993", "weight": 38 }, { "fieldValue": "02/1982", "weight": 37 }, { "fieldValue": "02/1987", "weight": 33 }, { "fieldValue": "02/1998", "weight": 33 }, { "fieldValue": "11/1986", "weight": 32 }, { "fieldValue": "11/1988", "weight": 31 }, { "fieldValue": "11/1990", "weight": 29 }, { "fieldValue": "04/1982", "weight": 29 }, { "fieldValue": "04/1993", "weight": 29 } ] }, { "fieldName": "aero_cht", "values": [ { "fieldValue": "NEW YORK", "weight": 408 }, { "fieldValue": "CHICAGO", "weight": 370 }, { "fieldValue": "DETROIT", "weight": 308 }, { "fieldValue": "DALLAS-FT WORTH", "weight": 243 }, { "fieldValue": "ST LOUIS", "weight": 230 }, { "fieldValue": "WASHINGTON", "weight": 228 }, { "fieldValue": "SEATTLE", "weight": 202 }, { "fieldValue": "ATLANTA", "weight": 199 }, { "fieldValue": "HOUSTON", "weight": 189 }, { "fieldValue": "KANSAS CITY", "weight": 183 } ] }, { "fieldName": "c_ldg_rts", "values": [ { "fieldValue": null, "weight": 3846 }, { "fieldValue": "N", "weight": 1085 }, { "fieldValue": "Y", "weight": 69 } ] }, { "fieldName": "cbd_dir", "values": [ { "fieldValue": "N", "weight": 901 }, { "fieldValue": "SE", "weight": 636 }, { "fieldValue": "NW", "weight": 626 }, { "fieldValue": "NE", "weight": 606 }, { "fieldValue": "SW", "weight": 605 }, { "fieldValue": "W", "weight": 550 }, { "fieldValue": "S", "weight": 541 }, { "fieldValue": "E", "weight": 491 }, { "fieldValue": null, "weight": 26 }, { "fieldValue": "NNW", "weight": 4 } ] }, { "fieldName": "cbd_dist", "values": [ { "fieldValue": "0 to 57", "weight": 5000 } ] }, { "fieldName": "cert", "values": [ { "fieldValue": null, "weight": 4845 }, { "fieldValue": "AS 05/1973", "weight": 20 }, { "fieldValue": "CS 05/1973", "weight": 20 }, { "fieldValue": "BS 05/1973", "weight": 17 }, { "fieldValue": "LU 08/1990", "weight": 8 }, { "fieldValue": "AU 05/1973", "weight": 8 }, { "fieldValue": "ES 05/1973", "weight": 7 }, { "fieldValue": "LU 05/1973", "weight": 6 }, { "fieldValue": "DS 05/1973", "weight": 4 }, { "fieldValue": "AU 04/1975", "weight": 3 } ] }, { "fieldName": "city", "values": [ { "fieldValue": "HOUSTON", "weight": 33 }, { "fieldValue": "SPRINGFIELD", "weight": 15 }, { "fieldValue": "LOS ANGELES", "weight": 15 }, { "fieldValue": "SALEM", "weight": 13 }, { "fieldValue": "FORT WORTH", "weight": 13 }, { "fieldValue": "GREENVILLE", "weight": 13 }, { "fieldValue": "NASHVILLE", "weight": 10 }, { "fieldValue": "PLYMOUTH", "weight": 10 }, { "fieldValue": "INDIANAPOLIS", "weight": 10 }, { "fieldValue": "COLUMBUS", "weight": 9 } ] }, { "fieldName": "cntl_twr", "values": [ { "fieldValue": "N", "weight": 4829 }, { "fieldValue": "Y", "weight": 171 } ] }, { "fieldName": "code", "values": [ { "fieldValue": "PKB", "weight": 1 }, { "fieldValue": "43B", "weight": 1 }, { "fieldValue": "7W4", "weight": 1 }, { "fieldValue": "T61", "weight": 1 }, { "fieldValue": "83OK", "weight": 1 }, { "fieldValue": "ME77", "weight": 1 }, { "fieldValue": "07MO", "weight": 1 }, { "fieldValue": "4MA8", "weight": 1 }, { "fieldValue": "1LA3", "weight": 1 }, { "fieldValue": "3LL0", "weight": 1 } ] }, { "fieldName": "county", "values": [ { "fieldValue": "WASHINGTON", "weight": 59 }, { "fieldValue": "LOS ANGELES", "weight": 50 }, { "fieldValue": "JEFFERSON", "weight": 47 }, { "fieldValue": "MONTGOMERY", "weight": 41 }, { "fieldValue": "JACKSON", "weight": 39 }, { "fieldValue": "FRANKLIN", "weight": 38 }, { "fieldValue": "MARION", "weight": 33 }, { "fieldValue": "HARRIS", "weight": 31 }, { "fieldValue": "ADAMS", "weight": 30 }, { "fieldValue": "ORANGE", "weight": 28 } ] }, { "fieldName": "cust_intl", "values": [ { "fieldValue": null, "weight": 3846 }, { "fieldValue": "N", "weight": 1144 }, { "fieldValue": "Y", "weight": 10 } ] }, { "fieldName": "elevation", "values": [ { "fieldValue": "-3 to 10603", "weight": 5000 } ] }, { "fieldName": "faa_dist", "values": [ { "fieldValue": "NONE", "weight": 1796 }, { "fieldValue": "CHI", "weight": 400 }, { "fieldValue": "DET", "weight": 321 }, { "fieldValue": "SEA", "weight": 290 }, { "fieldValue": "ATL", "weight": 271 }, { "fieldValue": "NYC", "weight": 250 }, { "fieldValue": "MSP", "weight": 245 }, { "fieldValue": "ORL", "weight": 235 }, { "fieldValue": "HAR", "weight": 203 }, { "fieldValue": "DEN", "weight": 168 } ] }, { "fieldName": "faa_region", "values": [ { "fieldValue": "AGL", "weight": 1104 }, { "fieldValue": "ASW", "weight": 836 }, { "fieldValue": "ASO", "weight": 784 }, { "fieldValue": "AEA", "weight": 646 }, { "fieldValue": "ANM", "weight": 515 }, { "fieldValue": "ACE", "weight": 400 }, { "fieldValue": "AWP", "weight": 363 }, { "fieldValue": "ANE", "weight": 206 }, { "fieldValue": "AAL", "weight": 143 }, { "fieldValue": null, "weight": 3 } ] }, { "fieldName": "fac_type", "values": [ { "fieldValue": "AIRPORT", "weight": 3522 }, { "fieldValue": "HELIPORT", "weight": 1294 }, { "fieldValue": "SEAPLANE BASE", "weight": 120 }, { "fieldValue": "ULTRALIGHT", "weight": 33 }, { "fieldValue": "STOLPORT", "weight": 16 }, { "fieldValue": "GLIDERPORT", "weight": 12 }, { "fieldValue": "BALLOONPORT", "weight": 3 } ] }, { "fieldName": "fac_use", "values": [ { "fieldValue": "PR", "weight": 3649 }, { "fieldValue": "PU", "weight": 1351 } ] }, { "fieldName": "fed_agree", "values": [ { "fieldValue": null, "weight": 4120 }, { "fieldValue": "NGY", "weight": 415 }, { "fieldValue": "N", "weight": 132 }, { "fieldValue": "NGY3", "weight": 58 }, { "fieldValue": "NY1", "weight": 39 }, { "fieldValue": "NGPY", "weight": 36 }, { "fieldValue": "1", "weight": 33 }, { "fieldValue": "N1", "weight": 26 }, { "fieldValue": "NGSY", "weight": 19 }, { "fieldValue": "NGPY3", "weight": 18 } ] }, { "fieldName": "full_name", "values": [ { "fieldValue": "SMITH", "weight": 7 }, { "fieldValue": "ST FRANCIS HOSPITAL", "weight": 4 }, { "fieldValue": "JOHNSON", "weight": 4 }, { "fieldValue": "SMITH FIELD", "weight": 4 }, { "fieldValue": "MEMORIAL HOSPITAL", "weight": 4 }, { "fieldValue": "SUMMIT", "weight": 3 }, { "fieldValue": "MERCY HOSPITAL", "weight": 3 }, { "fieldValue": "WILLIAMS", "weight": 3 }, { "fieldValue": "ST JOSEPH HOSPITAL", "weight": 3 }, { "fieldValue": "VA MEDICAL CENTER", "weight": 3 } ] }, { "fieldName": "id", "values": [ { "fieldValue": "1 to 19783", "weight": 5000 } ] }, { "fieldName": "joint_use", "values": [ { "fieldValue": null, "weight": 3751 }, { "fieldValue": "N", "weight": 1205 }, { "fieldValue": "Y", "weight": 44 } ] }, { "fieldName": "latitude", "values": [ { "fieldValue": "-14.21 to 70.91", "weight": 5000 } ] }, { "fieldName": "longitude", "values": [ { "fieldValue": "-100.0 to 174.11", "weight": 5000 } ] }, { "fieldName": "major", "values": [ { "fieldValue": "N", "weight": 4941 }, { "fieldValue": "Y", "weight": 59 } ] }, { "fieldName": "mil_rts", "values": [ { "fieldValue": null, "weight": 3742 }, { "fieldValue": "Y", "weight": 738 }, { "fieldValue": "N", "weight": 520 } ] }, { "fieldName": "own_type", "values": [ { "fieldValue": "PR", "weight": 3601 }, { "fieldValue": "PU", "weight": 1315 }, { "fieldValue": "MA", "weight": 32 }, { "fieldValue": "MR", "weight": 30 }, { "fieldValue": "MN", "weight": 22 } ] }, { "fieldName": "site_number", "values": [ { "fieldValue": "02798.68*A", "weight": 1 }, { "fieldValue": "11141.*A", "weight": 1 }, { "fieldValue": "07555.11*H", "weight": 1 }, { "fieldValue": "11713.*A", "weight": 1 }, { "fieldValue": "12143.7*U", "weight": 1 }, { "fieldValue": "21299.05*H", "weight": 1 }, { "fieldValue": "09025.*H", "weight": 1 }, { "fieldValue": "24693.34*H", "weight": 1 }, { "fieldValue": "13070.*A", "weight": 1 }, { "fieldValue": "03006.42*H", "weight": 1 } ] }, { "fieldName": "state", "values": [ { "fieldValue": "TX", "weight": 471 }, { "fieldValue": "IL", "weight": 239 }, { "fieldValue": "CA", "weight": 225 }, { "fieldValue": "FL", "weight": 223 }, { "fieldValue": "PA", "weight": 193 }, { "fieldValue": "OH", "weight": 187 }, { "fieldValue": "IN", "weight": 162 }, { "fieldValue": "NY", "weight": 145 }, { "fieldValue": "AK", "weight": 143 }, { "fieldValue": "MO", "weight": 139 } ] }, { "fieldName": null, "values": [ { "fieldValue": null, "weight": 5000 } ] } ]
WITH __stage0 AS ( SELECT * from (SELECT * FROM '../data/airports.parquet' USING SAMPLE 5000) as x limit 100000 ) , __stage1 AS ( SELECT group_set, CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldName", CASE group_set WHEN 0 THEN 'act_date' WHEN 1 THEN 'aero_cht' WHEN 2 THEN 'c_ldg_rts' WHEN 3 THEN 'cbd_dir' WHEN 4 THEN 'cbd_dist' WHEN 5 THEN 'cert' WHEN 6 THEN 'city' WHEN 7 THEN 'cntl_twr' WHEN 8 THEN 'code' WHEN 9 THEN 'county' WHEN 10 THEN 'cust_intl' WHEN 11 THEN 'elevation' WHEN 12 THEN 'faa_dist' WHEN 13 THEN 'faa_region' WHEN 14 THEN 'fac_type' WHEN 15 THEN 'fac_use' WHEN 16 THEN 'fed_agree' WHEN 17 THEN 'full_name' WHEN 18 THEN 'id' WHEN 19 THEN 'joint_use' WHEN 20 THEN 'latitude' WHEN 21 THEN 'longitude' WHEN 22 THEN 'major' WHEN 23 THEN 'mil_rts' WHEN 24 THEN 'own_type' WHEN 25 THEN 'site_number' WHEN 26 THEN 'state' END as "fieldPath", CASE group_set WHEN 0 THEN 'string' WHEN 1 THEN 'string' WHEN 2 THEN 'string' WHEN 3 THEN 'string' WHEN 4 THEN 'number' WHEN 5 THEN 'string' WHEN 6 THEN 'string' WHEN 7 THEN 'string' WHEN 8 THEN 'string' WHEN 9 THEN 'string' WHEN 10 THEN 'string' WHEN 11 THEN 'number' WHEN 12 THEN 'string' WHEN 13 THEN 'string' WHEN 14 THEN 'string' WHEN 15 THEN 'string' WHEN 16 THEN 'string' WHEN 17 THEN 'string' WHEN 18 THEN 'number' WHEN 19 THEN 'string' WHEN 20 THEN 'number' WHEN 21 THEN 'number' WHEN 22 THEN 'string' WHEN 23 THEN 'string' WHEN 24 THEN 'string' WHEN 25 THEN 'string' WHEN 26 THEN 'string' END as "fieldType", CASE group_set WHEN 99999 THEN CAST(NULL as VARCHAR) WHEN 0 THEN base."act_date" WHEN 1 THEN base."aero_cht" WHEN 2 THEN base."c_ldg_rts" WHEN 3 THEN base."cbd_dir" WHEN 5 THEN base."cert" WHEN 6 THEN base."city" WHEN 7 THEN base."cntl_twr" WHEN 8 THEN base."code" WHEN 9 THEN base."county" WHEN 10 THEN base."cust_intl" WHEN 12 THEN base."faa_dist" WHEN 13 THEN base."faa_region" WHEN 14 THEN base."fac_type" WHEN 15 THEN base."fac_use" WHEN 16 THEN base."fed_agree" WHEN 17 THEN base."full_name" WHEN 19 THEN base."joint_use" WHEN 22 THEN base."major" WHEN 23 THEN base."mil_rts" WHEN 24 THEN base."own_type" WHEN 25 THEN base."site_number" WHEN 26 THEN base."state" END as "fieldValue", COUNT(*) as "weight", CASE group_set WHEN 99999 THEN '' WHEN 4 THEN MIN(CAST(base."cbd_dist" as VARCHAR)) || ' to ' || CAST(MAX(base."cbd_dist") as VARCHAR) WHEN 11 THEN MIN(CAST(base."elevation" as VARCHAR)) || ' to ' || CAST(MAX(base."elevation") as VARCHAR) WHEN 18 THEN MIN(CAST(base."id" as VARCHAR)) || ' to ' || CAST(MAX(base."id") as VARCHAR) WHEN 20 THEN MIN(CAST(base."latitude" as VARCHAR)) || ' to ' || CAST(MAX(base."latitude") as VARCHAR) WHEN 21 THEN MIN(CAST(base."longitude" as VARCHAR)) || ' to ' || CAST(MAX(base."longitude") as VARCHAR) END as "fieldRange" FROM __stage0 as base CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,27,1)) as group_set ) as group_set GROUP BY 1,2,3,4,5 ) , __stage2 AS ( SELECT "fieldName", "fieldPath", "fieldType", COALESCE("fieldValue", "fieldRange") as "fieldValue", "weight" FROM __stage1 ) , __stage3 AS ( SELECT group_set, base."fieldName" as "fieldName__0", CASE WHEN group_set=1 THEN base."fieldValue" END as "fieldValue__1", CASE WHEN group_set=1 THEN base."weight" END as "weight__1" FROM __stage2 as base CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set ) as group_set GROUP BY 1,2,3,4 ) SELECT "fieldName__0" as "fieldName", COALESCE(LIST({ "fieldValue": "fieldValue__1", "weight": "weight__1"} ORDER BY "weight__1" desc NULLS LAST) FILTER (WHERE group_set=1)[1:10],[]) as "values" FROM __stage3 GROUP BY 1 ORDER BY 1 ASC NULLS LAST
A More Complex Example
The rest of this pages uses the model below. The data is an excerpt from the IMDB. The Malloy schema for this model is shown on the right. The core value is movies, but joined at the principals (the people that worked on the movie) and the people (the actual data about the individuals).
We use the measure total_ratings
to determin a movie's popularity. An individual's popularity is determined by the some of all the ratings of the movies a person has worked on.
source: movies is duckdb.table('../data/titles.parquet') extend { join_many: principals is duckdb.table('../data/principals.parquet') extend { join_one: people is duckdb.table('../data/names.parquet') on nconst = people.nconst } on tconst = principals.tconst measure: total_ratings is numVotes.sum() }
Weights can be any measure
Often a row count will work nicely as a weight, but sometimes there is something better. In movies, for example the sum of the number of votes will not only find the interesting most interesting movies but will also find the most interesting people. For example the most interesting people in the dataset.
run: movies -> { group_by: principals.people.primaryName aggregate: total_ratings }
[ { "primaryName": "Brad Pitt", "total_ratings": 18269386 }, { "primaryName": "Stan Lee", "total_ratings": 18229893 }, { "primaryName": "John Williams", "total_ratings": 17651379 }, { "primaryName": "Leonardo DiCaprio", "total_ratings": 17032662 }, { "primaryName": "Tom Hanks", "total_ratings": 16650835 }, { "primaryName": "Christopher Nolan", "total_ratings": 15368415 }, { "primaryName": "Steven Spielberg", "total_ratings": 15069082 }, { "primaryName": "Robert De Niro", "total_ratings": 14629861 }, { "primaryName": "Christian Bale", "total_ratings": 13730681 }, { "primaryName": "Samuel L. Jackson", "total_ratings": 13423134 }, { "primaryName": "Robert Downey Jr.", "total_ratings": 12685331 }, { "primaryName": "George Lucas", "total_ratings": 12584331 }, { "primaryName": "Matt Damon", "total_ratings": 12509393 }, { "primaryName": "Quentin Tarantino", "total_ratings": 12348394 }, { "primaryName": "Bruce Willis", "total_ratings": 12045753 }, { "primaryName": "Jack Kirby", "total_ratings": 11901244 }, { "primaryName": "Johnny Depp", "total_ratings": 11755228 }, { "primaryName": "Morgan Freeman", "total_ratings": 11435905 }, { "primaryName": "Tom Cruise", "total_ratings": 11290573 }, { "primaryName": "Thomas Newman", "total_ratings": 11056371 }, { "primaryName": "Hans Zimmer", "total_ratings": 10957053 }, { "primaryName": "Scott Rudin", "total_ratings": 10793692 }, { "primaryName": "Charles Roven", "total_ratings": 10778794 }, { "primaryName": "Bob Kane", "total_ratings": 10685405 }, { "primaryName": "Ian McKellen", "total_ratings": 10552024 }, { "primaryName": "Chris Evans", "total_ratings": 10497785 }, { "primaryName": "Harrison Ford", "total_ratings": 10449365 }, { "primaryName": "Scarlett Johansson", "total_ratings": 10128373 }, { "primaryName": "David S. Goyer", "total_ratings": 10022517 }, { "primaryName": "Jonathan Nolan", "total_ratings": 9823211 }, { "primaryName": "Martin Scorsese", "total_ratings": 9692050 }, { "primaryName": "Peter Jackson", "total_ratings": 9612306 }, { "primaryName": "Orlando Bloom", "total_ratings": 9482690 }, { "primaryName": "James Newton Howard", "total_ratings": 9473293 }, { "primaryName": "Stephen King", "total_ratings": 9454020 }, { "primaryName": "Hugh Jackman", "total_ratings": 9409702 }, { "primaryName": "Kevin Feige", "total_ratings": 9408976 }, { "primaryName": "Ridley Scott", "total_ratings": 9334292 }, { "primaryName": "Natalie Portman", "total_ratings": 9206903 }, { "primaryName": "Liam Neeson", "total_ratings": 9185450 }, { "primaryName": "David Heyman", "total_ratings": 9174472 }, { "primaryName": "Mark Ruffalo", "total_ratings": 9171115 }, { "primaryName": "Bradley Cooper", "total_ratings": 8980668 }, { "primaryName": "Will Smith", "total_ratings": 8960562 }, { "primaryName": "Al Pacino", "total_ratings": 8933776 }, { "primaryName": "Emma Thomas", "total_ratings": 8773124 }, { "primaryName": "Tim Bevan", "total_ratings": 8756327 }, { "primaryName": "Roger Deakins", "total_ratings": 8737852 }, { "primaryName": "Fran Walsh", "total_ratings": 8594296 }, { "primaryName": "Michael Caine", "total_ratings": 8533028 }, { "primaryName": "Lorne Orleans", "total_ratings": 8518672 }, { "primaryName": "Lawrence Bender", "total_ratings": 8503500 }, { "primaryName": "Keanu Reeves", "total_ratings": 8480149 }, { "primaryName": "David Fincher", "total_ratings": 8427910 }, { "primaryName": "Mark Wahlberg", "total_ratings": 8393193 }, { "primaryName": "Joel Silver", "total_ratings": 8360819 }, { "primaryName": "Robert Richardson", "total_ratings": 8351268 }, { "primaryName": "Philippa Boyens", "total_ratings": 8336233 }, { "primaryName": "Kathleen Kennedy", "total_ratings": 8278785 }, { "primaryName": "Matthew McConaughey", "total_ratings": 8271368 }, { "primaryName": "Ben Affleck", "total_ratings": 8268036 }, { "primaryName": "James Horner", "total_ratings": 8253828 }, { "primaryName": "Alan Silvestri", "total_ratings": 8072953 }, { "primaryName": "Chris Hemsworth", "total_ratings": 8058188 }, { "primaryName": "Carter Burwell", "total_ratings": 7997536 }, { "primaryName": "Simon Kinberg", "total_ratings": 7846138 }, { "primaryName": "Eric Fellner", "total_ratings": 7845245 }, { "primaryName": "J.R.R. Tolkien", "total_ratings": 7798667 }, { "primaryName": "Jake Gyllenhaal", "total_ratings": 7762048 }, { "primaryName": "Jerry Bruckheimer", "total_ratings": 7721877 }, { "primaryName": "Edward Norton", "total_ratings": 7713816 }, { "primaryName": "Woody Harrelson", "total_ratings": 7711063 }, { "primaryName": "Ewan McGregor", "total_ratings": 7655458 }, { "primaryName": "George Clooney", "total_ratings": 7653368 }, { "primaryName": "David Benioff", "total_ratings": 7637309 }, { "primaryName": "Kevin Spacey", "total_ratings": 7603094 }, { "primaryName": "Brian Grazer", "total_ratings": 7570625 }, { "primaryName": "Anne Hathaway", "total_ratings": 7509220 }, { "primaryName": "Arnon Milchan", "total_ratings": 7494607 }, { "primaryName": "Jim Carrey", "total_ratings": 7421247 }, { "primaryName": "Robert Zemeckis", "total_ratings": 7371523 }, { "primaryName": "Danny Elfman", "total_ratings": 7340555 }, { "primaryName": "Ryan Reynolds", "total_ratings": 7319537 }, { "primaryName": "Jason Blum", "total_ratings": 7308653 }, { "primaryName": "James Cameron", "total_ratings": 7306015 }, { "primaryName": "Sally Menke", "total_ratings": 7239769 }, { "primaryName": "Tom Hardy", "total_ratings": 7213428 }, { "primaryName": "Clint Eastwood", "total_ratings": 7209434 }, { "primaryName": "Russell Crowe", "total_ratings": 7166573 }, { "primaryName": "Akiva Goldsman", "total_ratings": 7094797 }, { "primaryName": "Nicolas Cage", "total_ratings": 7053952 }, { "primaryName": "Michael Giacchino", "total_ratings": 7047025 }, { "primaryName": "Gary Oldman", "total_ratings": 7036991 }, { "primaryName": "Cate Blanchett", "total_ratings": 7017425 }, { "primaryName": "Michael Bay", "total_ratings": 7016200 }, { "primaryName": "J.J. Abrams", "total_ratings": 6991513 }, { "primaryName": "Willem Dafoe", "total_ratings": 6924815 }, { "primaryName": "Christopher Markus", "total_ratings": 6886318 }, { "primaryName": "Stephen McFeely", "total_ratings": 6886318 }, { "primaryName": "Neal H. Moritz", "total_ratings": 6870329 }, { "primaryName": "Alexandre Desplat", "total_ratings": 6867309 }, { "primaryName": "Jennifer Lawrence", "total_ratings": 6855625 }, { "primaryName": "Rachel McAdams", "total_ratings": 6777091 }, { "primaryName": "Lena Headey", "total_ratings": 6742472 }, { "primaryName": "Elijah Wood", "total_ratings": 6734358 }, { "primaryName": "Dwayne Johnson", "total_ratings": 6584416 }, { "primaryName": "Jason Statham", "total_ratings": 6582293 }, { "primaryName": "Steve Kloves", "total_ratings": 6559963 }, { "primaryName": "Denzel Washington", "total_ratings": 6556554 }, { "primaryName": "Sigourney Weaver", "total_ratings": 6515096 }, { "primaryName": "Arnold Schwarzenegger", "total_ratings": 6482925 }, { "primaryName": "Tim Burton", "total_ratings": 6467281 }, { "primaryName": "Steve Carell", "total_ratings": 6436581 }, { "primaryName": "Terry Rossio", "total_ratings": 6423369 }, { "primaryName": "Ceán Chaffin", "total_ratings": 6415865 }, { "primaryName": "Amy Adams", "total_ratings": 6401556 }, { "primaryName": "Daniel Radcliffe", "total_ratings": 6393663 }, { "primaryName": "J.K. Rowling", "total_ratings": 6392020 }, { "primaryName": "Mel Gibson", "total_ratings": 6322650 }, { "primaryName": "Anthony Hopkins", "total_ratings": 6315913 }, { "primaryName": "Emma Watson", "total_ratings": 6304351 }, { "primaryName": null, "total_ratings": 6300267 }, { "primaryName": "Rick McCallum", "total_ratings": 6211607 }, { "primaryName": "Jack Nicholson", "total_ratings": 6180622 }, { "primaryName": "Ted Elliott", "total_ratings": 6136354 }, { "primaryName": "Adam Sandler", "total_ratings": 6094935 }, { "primaryName": "Ed Harris", "total_ratings": 6077615 }, { "primaryName": "Chris Columbus", "total_ratings": 6076896 }, { "primaryName": "Cameron Diaz", "total_ratings": 6042333 }, { "primaryName": "Joseph Gordon-Levitt", "total_ratings": 6034397 }, { "primaryName": "Emilia Clarke", "total_ratings": 5995245 }, { "primaryName": "D.B. Weiss", "total_ratings": 5972128 }, { "primaryName": "Joaquin Phoenix", "total_ratings": 5967875 }, { "primaryName": "Daniel Craig", "total_ratings": 5967704 }, { "primaryName": "Luc Besson", "total_ratings": 5961182 }, { "primaryName": "Ryan Gosling", "total_ratings": 5956376 }, { "primaryName": "Vin Diesel", "total_ratings": 5944056 }, { "primaryName": "Andrew Stanton", "total_ratings": 5942167 }, { "primaryName": "John Goodman", "total_ratings": 5940986 }, { "primaryName": "Ralph Fiennes", "total_ratings": 5906715 }, { "primaryName": "Peter Dinklage", "total_ratings": 5906118 }, { "primaryName": "David Koepp", "total_ratings": 5894926 }, { "primaryName": "Pete Docter", "total_ratings": 5862141 }, { "primaryName": "Sylvester Stallone", "total_ratings": 5861981 }, { "primaryName": "Angelina Jolie", "total_ratings": 5857481 }, { "primaryName": "Ian Bryce", "total_ratings": 5848379 }, { "primaryName": "Hugo Weaving", "total_ratings": 5841890 }, { "primaryName": "Frank Darabont", "total_ratings": 5826908 }, { "primaryName": "Ben Kingsley", "total_ratings": 5825798 }, { "primaryName": "Viggo Mortensen", "total_ratings": 5823719 }, { "primaryName": "Richard Francis-Bruce", "total_ratings": 5798818 }, { "primaryName": "Colin Farrell", "total_ratings": 5788835 }, { "primaryName": "Charlize Theron", "total_ratings": 5765794 }, { "primaryName": "Joss Whedon", "total_ratings": 5757097 }, { "primaryName": "Nikolaj Coster-Waldau", "total_ratings": 5749486 }, { "primaryName": "Julianne Moore", "total_ratings": 5744632 }, { "primaryName": "Jonah Hill", "total_ratings": 5718674 }, { "primaryName": "Joel Coen", "total_ratings": 5700458 }, { "primaryName": "Zack Snyder", "total_ratings": 5695090 }, { "primaryName": "Ben Stiller", "total_ratings": 5685709 }, { "primaryName": "Bryan Cranston", "total_ratings": 5685238 }, { "primaryName": "Kate Winslet", "total_ratings": 5682918 }, { "primaryName": "Ethan Coen", "total_ratings": 5663547 }, { "primaryName": "Walter F. Parkes", "total_ratings": 5657162 }, { "primaryName": "Howard Shore", "total_ratings": 5638214 }, { "primaryName": "Jeremy Renner", "total_ratings": 5623966 }, { "primaryName": "Marco Beltrami", "total_ratings": 5590317 }, { "primaryName": "Bryan Singer", "total_ratings": 5590237 }, { "primaryName": "Emma Stone", "total_ratings": 5583422 }, { "primaryName": "Guillermo del Toro", "total_ratings": 5570240 }, { "primaryName": "Owen Wilson", "total_ratings": 5556539 }, { "primaryName": "Rupert Grint", "total_ratings": 5552590 }, { "primaryName": "Jude Law", "total_ratings": 5538884 }, { "primaryName": "Matthew Vaughn", "total_ratings": 5524297 }, { "primaryName": "Alex Kurtzman", "total_ratings": 5522344 }, { "primaryName": "Lana Wachowski", "total_ratings": 5518169 }, { "primaryName": "Keira Knightley", "total_ratings": 5512152 }, { "primaryName": "Lilly Wachowski", "total_ratings": 5444217 }, { "primaryName": "Judd Apatow", "total_ratings": 5443590 }, { "primaryName": "Chris Pratt", "total_ratings": 5439043 }, { "primaryName": "Francis Ford Coppola", "total_ratings": 5436313 }, { "primaryName": "Will Ferrell", "total_ratings": 5420626 }, { "primaryName": "Seth Rogen", "total_ratings": 5419570 }, { "primaryName": "Sandra Bullock", "total_ratings": 5405221 }, { "primaryName": "Steve Ditko", "total_ratings": 5371488 }, { "primaryName": "Frank Marshall", "total_ratings": 5363210 }, { "primaryName": "John Logan", "total_ratings": 5353165 }, { "primaryName": "Ken Watanabe", "total_ratings": 5352344 }, { "primaryName": "Sam Raimi", "total_ratings": 5334765 }, { "primaryName": "Carrie-Anne Moss", "total_ratings": 5305827 }, { "primaryName": "Benedict Cumberbatch", "total_ratings": 5273615 }, { "primaryName": "Robin Williams", "total_ratings": 5224218 }, { "primaryName": "Jennifer Connelly", "total_ratings": 5222568 }, { "primaryName": "Ron Howard", "total_ratings": 5215903 }, { "primaryName": "John Travolta", "total_ratings": 5206564 }, { "primaryName": "Laurence Fishburne", "total_ratings": 5187514 }, { "primaryName": "Ethan Hawke", "total_ratings": 5186955 }, { "primaryName": "Jamie Foxx", "total_ratings": 5180837 }, { "primaryName": "Lawrence Kasdan", "total_ratings": 5173484 }, { "primaryName": "Gerard Butler", "total_ratings": 5156049 } ]
SELECT people_0."primaryName" as "primaryName", COALESCE(( SELECT SUM(a.val) as value FROM ( SELECT UNNEST(list(distinct {key:base."__distinct_key", val: base."numVotes"})) a ) ),0) as "total_ratings" FROM (SELECT GEN_RANDOM_UUID() as "__distinct_key", x.* FROM '../data/titles.parquet' as x) as base LEFT JOIN '../data/principals.parquet' AS principals_0 ON base."tconst"=principals_0."tconst" LEFT JOIN '../data/names.parquet' AS people_0 ON principals_0."nconst"=people_0."nconst" GROUP BY 1 ORDER BY 2 desc NULLS LAST
Index the entire graph
Indexing can work across an entire network of joins and can be selective.
run: movies -> { index: * genres.* principals.category, principals.job principals.characters.* principals.people.primaryName by total_ratings sample: 5000 } -> { select: * order_by: weight desc }
[ { "fieldName": "averageRating", "fieldPath": "averageRating", "fieldType": "number", "fieldValue": "1.0 to 10.0", "weight": 712945831 }, { "fieldName": "endYear", "fieldPath": "endYear", "fieldType": "number", "fieldValue": "1966.0 to 2024.0", "weight": 712945831 }, { "fieldName": "isAdult", "fieldPath": "isAdult", "fieldType": "string", "fieldValue": "0", "weight": 712945831 }, { "fieldName": "numVotes", "fieldPath": "numVotes", "fieldType": "number", "fieldValue": "100038.0 to 2755400.0", "weight": 712945831 }, { "fieldName": "runtimeMinutes", "fieldPath": "runtimeMinutes", "fieldType": "number", "fieldValue": "100.0 to 780.0", "weight": 712945831 }, { "fieldName": "startYear", "fieldPath": "startYear", "fieldType": "number", "fieldValue": "1902.0 to 2023.0", "weight": 712945831 }, { "fieldName": null, "fieldPath": null, "fieldType": null, "fieldValue": null, "weight": 712945831 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "\\N", "weight": 708365888 }, { "fieldName": "principals.category", "fieldPath": "principals/category", "fieldType": "string", "fieldValue": "actor", "weight": 699958672 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "\\N", "weight": 697974623 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "\\N", "weight": 697974623 }, { "fieldName": "principals.category", "fieldPath": "principals/category", "fieldType": "string", "fieldValue": "director", "weight": 618272415 }, { "fieldName": "principals.category", "fieldPath": "principals/category", "fieldType": "string", "fieldValue": "actress", "weight": 603224002 }, { "fieldName": "principals.category", "fieldPath": "principals/category", "fieldType": "string", "fieldValue": "writer", "weight": 600498587 }, { "fieldName": "principals.category", "fieldPath": "principals/category", "fieldType": "string", "fieldValue": "producer", "weight": 532573254 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "producer", "weight": 530506175 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Drama", "weight": 368832492 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Drama", "weight": 368832492 }, { "fieldName": "principals.category", "fieldPath": "principals/category", "fieldType": "string", "fieldValue": "composer", "weight": 322250648 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Action", "weight": 265899891 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Action", "weight": 265899891 }, { "fieldName": "principals.category", "fieldPath": "principals/category", "fieldType": "string", "fieldValue": "cinematographer", "weight": 226185350 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Adventure", "weight": 218408961 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Adventure", "weight": 218408961 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Comedy", "weight": 202353777 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Comedy", "weight": 202353777 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "director of photography", "weight": 172470517 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "screenplay", "weight": 171621240 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "written by", "weight": 167255627 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Crime", "weight": 141846566 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Crime", "weight": 141846566 }, { "fieldName": "principals.category", "fieldPath": "principals/category", "fieldType": "string", "fieldValue": "editor", "weight": 128423200 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Thriller", "weight": 117097901 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Thriller", "weight": 117097901 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Sci-Fi", "weight": 102896335 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Sci-Fi", "weight": 102896335 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "screenplay by", "weight": 100443326 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Mystery", "weight": 81617717 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Mystery", "weight": 81617717 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Romance", "weight": 73953338 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Romance", "weight": 73953338 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Fantasy", "weight": 69635224 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Fantasy", "weight": 69635224 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Horror", "weight": 63013834 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Horror", "weight": 63013834 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "story", "weight": 51389834 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Animation", "weight": 49865978 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Animation", "weight": 49865978 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Biography", "weight": 45936668 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Biography", "weight": 45936668 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "created by", "weight": 41230023 }, { "fieldName": "principals.category", "fieldPath": "principals/category", "fieldType": "string", "fieldValue": "production_designer", "weight": 38857006 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "novel", "weight": 35684022 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Family", "weight": 30586449 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Family", "weight": 30586449 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "story by", "weight": 30566713 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "characters", "weight": 22161268 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "History", "weight": 19028585 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "History", "weight": 19028585 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "based on the novel by", "weight": 18578725 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Stan Lee", "weight": 16197439 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "War", "weight": 15503426 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "War", "weight": 15503426 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Leonardo DiCaprio", "weight": 14462788 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "book", "weight": 14455916 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Pitt", "weight": 13749986 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Tom Hanks", "weight": 13346553 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Steven Spielberg", "weight": 12658150 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "based on characters created by", "weight": 12232626 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "John Williams", "weight": 12009635 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Robert Downey Jr.", "weight": 11597291 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "co-director", "weight": 11145189 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Music", "weight": 10673927 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Music", "weight": 10673927 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Jack Kirby", "weight": 10307671 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Morgan Freeman", "weight": 9988888 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "based on the Marvel comics by", "weight": 9914121 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "film editor", "weight": 9866042 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Quentin Tarantino", "weight": 9742900 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "developed by", "weight": 9634844 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Robert De Niro", "weight": 9570135 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Scott Rudin", "weight": 9451282 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Johnny Depp", "weight": 9290150 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Christopher Nolan", "weight": 9210343 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Sport", "weight": 9177999 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Sport", "weight": 9177999 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Chris Evans", "weight": 9134094 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Tony Stark", "weight": 9044642 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Tony Stark", "weight": 9044642 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Orlando Bloom", "weight": 8933010 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Peter Jackson", "weight": 8783538 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Matt Damon", "weight": 8730809 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Kevin Feige", "weight": 8541897 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Samuel L. Jackson", "weight": 8426342 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Roger Deakins", "weight": 8381858 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ridley Scott", "weight": 8188551 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Hans Zimmer", "weight": 8140667 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Christian Bale", "weight": 8122723 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "based on the book by", "weight": 7941704 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "George Lucas", "weight": 7858414 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Fran Walsh", "weight": 7813660 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Scarlett Johansson", "weight": 7799809 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "David Fincher", "weight": 7758046 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ian McKellen", "weight": 7745053 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Mark Ruffalo", "weight": 7659063 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Philippa Boyens", "weight": 7655853 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Joel Silver", "weight": 7560074 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Robert Richardson", "weight": 7511473 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Thomas Newman", "weight": 7470001 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "David Heyman", "weight": 7331291 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Chris Hemsworth", "weight": 7186670 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Natalie Portman", "weight": 7087334 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Jerry Bruckheimer", "weight": 7037693 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "J.R.R. Tolkien", "weight": 7033044 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Tom Hardy", "weight": 7003964 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brian Grazer", "weight": 6982619 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Gandalf", "weight": 6950661 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Gandalf", "weight": 6950661 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Jake Gyllenhaal", "weight": 6916495 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Martin Scorsese", "weight": 6905744 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Emma Thomas", "weight": 6851215 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Russell Crowe", "weight": 6812990 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Tom Cruise", "weight": 6785419 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "screen story", "weight": 6718563 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bruce Willis", "weight": 6617346 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Robert Zemeckis", "weight": 6615810 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Hugh Jackman", "weight": 6606626 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "David Benioff", "weight": 6577664 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Keanu Reeves", "weight": 6574189 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Elijah Wood", "weight": 6547409 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Captain America", "weight": 6543111 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Captain America", "weight": 6543111 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Narrator", "weight": 6539078 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Narrator", "weight": 6539078 }, { "fieldName": "genres.each", "fieldPath": "genres/each", "fieldType": "string", "fieldValue": "Western", "weight": 6486928 }, { "fieldName": "genres.value", "fieldPath": "genres/value", "fieldType": "string", "fieldValue": "Western", "weight": 6486928 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Alan Silvestri", "weight": 6466422 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley Cooper", "weight": 6379452 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Mark Wahlberg", "weight": 6330086 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Steve Rogers", "weight": 6308089 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Steve Rogers", "weight": 6308089 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ewan McGregor", "weight": 6291540 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Iron Man", "weight": 6248345 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Iron Man", "weight": 6248345 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ben Affleck", "weight": 6241140 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Carter Burwell", "weight": 6220375 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Lena Headey", "weight": 6207532 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Terry Rossio", "weight": 6202426 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "James Newton Howard", "weight": 6149797 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Bruce Wayne", "weight": 6054630 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Bruce Wayne", "weight": 6054630 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Stephen King", "weight": 6047406 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Jason Blum", "weight": 6042765 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Will Smith", "weight": 6037225 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Edward Norton", "weight": 6015503 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Woody Harrelson", "weight": 5996264 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bob Kane", "weight": 5989374 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ted Elliott", "weight": 5920934 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "George Clooney", "weight": 5899910 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "J.J. Abrams", "weight": 5879907 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Michael Bay", "weight": 5827713 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Gary Oldman", "weight": 5811123 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Jack Nicholson", "weight": 5793053 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Denzel Washington", "weight": 5754259 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ceán Chaffin", "weight": 5746001 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Liam Neeson", "weight": 5735633 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Arnon Milchan", "weight": 5694804 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batman", "weight": 5654261 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batman", "weight": 5654261 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Harrison Ford", "weight": 5653847 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Alexandre Desplat", "weight": 5614470 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Tim Bevan", "weight": 5559040 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Frodo", "weight": 5556178 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Frodo", "weight": 5556178 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Legolas", "weight": 5521522 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Legolas", "weight": 5521522 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Rachel McAdams", "weight": 5482750 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "D.B. Weiss", "weight": 5462163 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Kathleen Kennedy", "weight": 5431561 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ralph Fiennes", "weight": 5420859 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ed Harris", "weight": 5389617 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Stephen McFeely", "weight": 5384891 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Christopher Markus", "weight": 5384891 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Emma Watson", "weight": 5383712 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Peter Dinklage", "weight": 5372297 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Nicolas Cage", "weight": 5371034 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "James Cameron", "weight": 5370107 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Charles Roven", "weight": 5299540 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "creator", "weight": 5280266 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Cameron Diaz", "weight": 5277118 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Emilia Clarke", "weight": 5267821 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Thor", "weight": 5227393 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Thor", "weight": 5227393 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ian Bryce", "weight": 5223482 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Tim Burton", "weight": 5210615 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Keira Knightley", "weight": 5202993 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Jennifer Lawrence", "weight": 5198745 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Nikolaj Coster-Waldau", "weight": 5123241 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Joel Coen", "weight": 5119818 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Ethan Coen", "weight": 5119818 } ]
WITH __stage0 AS ( SELECT * from (SELECT * FROM '../data/titles.parquet' USING SAMPLE 5000) as x limit 100000 ) , __stage1 AS ( SELECT group_set, CASE group_set WHEN 0 THEN 'averageRating' WHEN 1 THEN 'endYear' WHEN 2 THEN 'isAdult' WHEN 3 THEN 'numVotes' WHEN 4 THEN 'originalTitle' WHEN 5 THEN 'primaryTitle' WHEN 6 THEN 'runtimeMinutes' WHEN 7 THEN 'startYear' WHEN 8 THEN 'tconst' WHEN 9 THEN 'genres.each' WHEN 10 THEN 'genres.value' WHEN 11 THEN 'principals.category' WHEN 12 THEN 'principals.job' WHEN 13 THEN 'principals.characters.each' WHEN 14 THEN 'principals.characters.value' WHEN 15 THEN 'principals.people.primaryName' END as "fieldName", CASE group_set WHEN 0 THEN 'averageRating' WHEN 1 THEN 'endYear' WHEN 2 THEN 'isAdult' WHEN 3 THEN 'numVotes' WHEN 4 THEN 'originalTitle' WHEN 5 THEN 'primaryTitle' WHEN 6 THEN 'runtimeMinutes' WHEN 7 THEN 'startYear' WHEN 8 THEN 'tconst' WHEN 9 THEN 'genres/each' WHEN 10 THEN 'genres/value' WHEN 11 THEN 'principals/category' WHEN 12 THEN 'principals/job' WHEN 13 THEN 'principals/characters/each' WHEN 14 THEN 'principals/characters/value' WHEN 15 THEN 'principals/people/primaryName' END as "fieldPath", CASE group_set WHEN 0 THEN 'number' WHEN 1 THEN 'number' WHEN 2 THEN 'string' WHEN 3 THEN 'number' WHEN 4 THEN 'string' WHEN 5 THEN 'string' WHEN 6 THEN 'number' WHEN 7 THEN 'number' WHEN 8 THEN 'string' WHEN 9 THEN 'string' WHEN 10 THEN 'string' WHEN 11 THEN 'string' WHEN 12 THEN 'string' WHEN 13 THEN 'string' WHEN 14 THEN 'string' WHEN 15 THEN 'string' END as "fieldType", CASE group_set WHEN 99999 THEN CAST(NULL as VARCHAR) WHEN 2 THEN base."isAdult" WHEN 4 THEN base."originalTitle" WHEN 5 THEN base."primaryTitle" WHEN 8 THEN base."tconst" WHEN 9 THEN genres_0 WHEN 10 THEN genres_0 WHEN 11 THEN principals_0."category" WHEN 12 THEN principals_0."job" WHEN 13 THEN characters_0 WHEN 14 THEN characters_0 WHEN 15 THEN people_0."primaryName" END as "fieldValue", COALESCE(( SELECT SUM(a.val) as value FROM ( SELECT UNNEST(list(distinct {key:base."__distinct_key", val: base."numVotes"})) a ) ),0) as "weight", CASE group_set WHEN 99999 THEN '' WHEN 0 THEN MIN(CAST(base."averageRating" as VARCHAR)) || ' to ' || CAST(MAX(base."averageRating") as VARCHAR) WHEN 1 THEN MIN(CAST(base."endYear" as VARCHAR)) || ' to ' || CAST(MAX(base."endYear") as VARCHAR) WHEN 3 THEN MIN(CAST(base."numVotes" as VARCHAR)) || ' to ' || CAST(MAX(base."numVotes") as VARCHAR) WHEN 6 THEN MIN(CAST(base."runtimeMinutes" as VARCHAR)) || ' to ' || CAST(MAX(base."runtimeMinutes") as VARCHAR) WHEN 7 THEN MIN(CAST(base."startYear" as VARCHAR)) || ' to ' || CAST(MAX(base."startYear") as VARCHAR) END as "fieldRange" FROM (SELECT GEN_RANDOM_UUID() as "__distinct_key", x.* FROM __stage0 as x) as base LEFT JOIN LATERAL (SELECT UNNEST(base."genres"), 1 as ignoreme) as genres_0_outer(genres_0,ignoreme) ON genres_0_outer.ignoreme=1 LEFT JOIN '../data/principals.parquet' AS principals_0 ON base."tconst"=principals_0."tconst" LEFT JOIN LATERAL (SELECT UNNEST(principals_0."characters"), 1 as ignoreme) as characters_0_outer(characters_0,ignoreme) ON characters_0_outer.ignoreme=1 LEFT JOIN '../data/names.parquet' AS people_0 ON principals_0."nconst"=people_0."nconst" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,16,1)) as group_set ) as group_set GROUP BY 1,2,3,4,5 ) , __stage2 AS ( SELECT "fieldName", "fieldPath", "fieldType", COALESCE("fieldValue", "fieldRange") as "fieldValue", "weight" FROM __stage1 ) SELECT base."fieldName" as "fieldName", base."fieldPath" as "fieldPath", base."fieldType" as "fieldType", base."fieldValue" as "fieldValue", base."weight" as "weight" FROM __stage2 as base ORDER BY 5 desc NULLS LAST
Search Index Naming Convention
By convention indexes in sources are named search_index
.
source: movies2 is duckdb.table('../data/titles.parquet') extend { join_many: principals is duckdb.table('../data/principals.parquet') extend { join_one: people is duckdb.table('../data/names.parquet') on nconst = people.nconst } on tconst = principals.tconst measure: total_ratings is numVotes.sum() view: search_index is { index: * genres.* principals.category, principals.job principals.characters.* principals.people.primaryName by total_ratings } }
So to look for 'Brad'
run: movies2 -> search_index -> { select: * where: fieldValue ~ 'Brad%' order_by: weight desc }
[ { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Pitt", "weight": 18269386 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley Cooper", "weight": 8980668 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Bird", "weight": 2783448 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley J. Fischer", "weight": 2582567 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Fiedel", "weight": 2516157 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Grey", "weight": 1858689 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Fuller", "weight": 1589845 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Garrett", "weight": 989180 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley Whitford", "weight": 908639 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Anderson", "weight": 785214 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley Thomas", "weight": 672907 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Dourif", "weight": 595034 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Falchuk", "weight": 594475 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Krevoy", "weight": 593257 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradford Lewis", "weight": 566153 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Peyton", "weight": 565084 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Bellick", "weight": 553782 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Bellick", "weight": 553782 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Silberling", "weight": 552092 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Gurdlinger", "weight": 466826 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Gurdlinger", "weight": 466826 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Ingelsby", "weight": 452543 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Segal", "weight": 402692 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Furman", "weight": 385968 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Mann", "weight": 384595 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Braden Aftergood", "weight": 374772 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Winderbaum", "weight": 366995 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Swaile", "weight": 345759 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Wright", "weight": 278557 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Renfro", "weight": 273848 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Bradley Fine", "weight": 250952 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Bradley Fine", "weight": 250952 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Greenquist", "weight": 250909 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad", "weight": 245925 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad", "weight": 245925 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Copeland", "weight": 234415 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Whitaker", "weight": 226799 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Whitaker", "weight": 226799 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Epstein", "weight": 197059 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley Gallo", "weight": 184654 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brady Corbet", "weight": 173307 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Majors - A Hero", "weight": 158457 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Majors - A Hero", "weight": 158457 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Wyman", "weight": 156357 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Davis", "weight": 150273 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradford Dillman", "weight": 131412 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Adamson", "weight": 113849 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Adamson", "weight": 113849 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Bradley Martin", "weight": 112103 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Bradley Martin", "weight": 112103 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Bradley Jackson", "weight": 110209 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Bradley Jackson", "weight": 110209 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Hamilton", "weight": 110039 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Hamilton", "weight": 110039 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Kane", "weight": 86923 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Halvorson", "weight": 85643 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley James", "weight": 84478 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Cage", "weight": 82655 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Cage", "weight": 82655 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Wesley", "weight": 80561 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Wesley", "weight": 80561 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradford Lipson", "weight": 78284 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brady Noon", "weight": 77389 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Ableson", "weight": 75588 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradford Young", "weight": 75006 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Booker", "weight": 74354 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Riddell", "weight": 73922 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Gann", "weight": 73337 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley Stonesifer", "weight": 71122 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Bradley Thomas", "weight": 71027 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Bradley Thomas", "weight": 71027 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Leland", "weight": 69259 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Lewis", "weight": 68518 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Lewis", "weight": 68518 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Gregory", "weight": 67955 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Gregory", "weight": 67955 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley Parker", "weight": 67355 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Stand", "weight": 65526 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Stand", "weight": 65526 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad (segment \\Tape 56\\)", "weight": 65431 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad (segment \\Tape 56\\)", "weight": 65431 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brady Coleman", "weight": 61797 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Chang", "weight": 59267 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Chang", "weight": 59267 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Beyer", "weight": 57584 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Weston", "weight": 51673 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Bottig", "weight": 51451 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Bottig", "weight": 51451 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Chase", "weight": 48224 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Chase", "weight": 48224 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley King", "weight": 47442 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Loree", "weight": 46958 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Harris", "weight": 46640 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Harris", "weight": 46640 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Morris", "weight": 46105 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Taylor", "weight": 46084 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Taylor", "weight": 46084 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley Raymond", "weight": 43731 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradley Jenkel", "weight": 42784 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad M. Gilbert", "weight": 39777 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Baruh", "weight": 39491 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Williams", "weight": 39300 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Williams", "weight": 39300 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Bradley", "weight": 38412 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Bradley", "weight": 38412 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Brad Brown", "weight": 37424 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Brad Brown", "weight": 37424 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradford L. Schlei", "weight": 35007 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Silverman", "weight": 34490 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Simpson", "weight": 33880 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bradford May", "weight": 33729 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Brad Johnson", "weight": 31763 } ]
WITH __stage0 AS ( SELECT group_set, CASE group_set WHEN 0 THEN 'averageRating' WHEN 1 THEN 'endYear' WHEN 2 THEN 'isAdult' WHEN 3 THEN 'numVotes' WHEN 4 THEN 'originalTitle' WHEN 5 THEN 'primaryTitle' WHEN 6 THEN 'runtimeMinutes' WHEN 7 THEN 'startYear' WHEN 8 THEN 'tconst' WHEN 9 THEN 'genres.each' WHEN 10 THEN 'genres.value' WHEN 11 THEN 'principals.category' WHEN 12 THEN 'principals.job' WHEN 13 THEN 'principals.characters.each' WHEN 14 THEN 'principals.characters.value' WHEN 15 THEN 'principals.people.primaryName' END as "fieldName", CASE group_set WHEN 0 THEN 'averageRating' WHEN 1 THEN 'endYear' WHEN 2 THEN 'isAdult' WHEN 3 THEN 'numVotes' WHEN 4 THEN 'originalTitle' WHEN 5 THEN 'primaryTitle' WHEN 6 THEN 'runtimeMinutes' WHEN 7 THEN 'startYear' WHEN 8 THEN 'tconst' WHEN 9 THEN 'genres/each' WHEN 10 THEN 'genres/value' WHEN 11 THEN 'principals/category' WHEN 12 THEN 'principals/job' WHEN 13 THEN 'principals/characters/each' WHEN 14 THEN 'principals/characters/value' WHEN 15 THEN 'principals/people/primaryName' END as "fieldPath", CASE group_set WHEN 0 THEN 'number' WHEN 1 THEN 'number' WHEN 2 THEN 'string' WHEN 3 THEN 'number' WHEN 4 THEN 'string' WHEN 5 THEN 'string' WHEN 6 THEN 'number' WHEN 7 THEN 'number' WHEN 8 THEN 'string' WHEN 9 THEN 'string' WHEN 10 THEN 'string' WHEN 11 THEN 'string' WHEN 12 THEN 'string' WHEN 13 THEN 'string' WHEN 14 THEN 'string' WHEN 15 THEN 'string' END as "fieldType", CASE group_set WHEN 99999 THEN CAST(NULL as VARCHAR) WHEN 2 THEN base."isAdult" WHEN 4 THEN base."originalTitle" WHEN 5 THEN base."primaryTitle" WHEN 8 THEN base."tconst" WHEN 9 THEN genres_0 WHEN 10 THEN genres_0 WHEN 11 THEN principals_0."category" WHEN 12 THEN principals_0."job" WHEN 13 THEN characters_0 WHEN 14 THEN characters_0 WHEN 15 THEN people_0."primaryName" END as "fieldValue", COALESCE(( SELECT SUM(a.val) as value FROM ( SELECT UNNEST(list(distinct {key:base."__distinct_key", val: base."numVotes"})) a ) ),0) as "weight", CASE group_set WHEN 99999 THEN '' WHEN 0 THEN MIN(CAST(base."averageRating" as VARCHAR)) || ' to ' || CAST(MAX(base."averageRating") as VARCHAR) WHEN 1 THEN MIN(CAST(base."endYear" as VARCHAR)) || ' to ' || CAST(MAX(base."endYear") as VARCHAR) WHEN 3 THEN MIN(CAST(base."numVotes" as VARCHAR)) || ' to ' || CAST(MAX(base."numVotes") as VARCHAR) WHEN 6 THEN MIN(CAST(base."runtimeMinutes" as VARCHAR)) || ' to ' || CAST(MAX(base."runtimeMinutes") as VARCHAR) WHEN 7 THEN MIN(CAST(base."startYear" as VARCHAR)) || ' to ' || CAST(MAX(base."startYear") as VARCHAR) END as "fieldRange" FROM (SELECT GEN_RANDOM_UUID() as "__distinct_key", x.* FROM '../data/titles.parquet' as x) as base LEFT JOIN LATERAL (SELECT UNNEST(base."genres"), 1 as ignoreme) as genres_0_outer(genres_0,ignoreme) ON genres_0_outer.ignoreme=1 LEFT JOIN '../data/principals.parquet' AS principals_0 ON base."tconst"=principals_0."tconst" LEFT JOIN LATERAL (SELECT UNNEST(principals_0."characters"), 1 as ignoreme) as characters_0_outer(characters_0,ignoreme) ON characters_0_outer.ignoreme=1 LEFT JOIN '../data/names.parquet' AS people_0 ON principals_0."nconst"=people_0."nconst" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,16,1)) as group_set ) as group_set GROUP BY 1,2,3,4,5 ) , __stage1 AS ( SELECT "fieldName", "fieldPath", "fieldType", COALESCE("fieldValue", "fieldRange") as "fieldValue", "weight" FROM __stage0 ) SELECT base."fieldName" as "fieldName", base."fieldPath" as "fieldPath", base."fieldType" as "fieldType", base."fieldValue" as "fieldValue", base."weight" as "weight" FROM __stage1 as base WHERE base."fieldValue" LIKE 'Brad%' ORDER BY 5 desc NULLS LAST
So to look for 'Bat'
run: movies2 -> search_index -> { select: * where: fieldValue ~ 'Bat%' order_by: weight desc }
[ { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batman", "weight": 10424082 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batman", "weight": 10424082 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman Begins", "weight": 1507854 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman Begins", "weight": 1507854 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "Batman created by", "weight": 1020989 }, { "fieldName": "principals.job", "fieldPath": "principals/job", "fieldType": "string", "fieldValue": "Batman characters", "weight": 964899 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batty", "weight": 790162 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batty", "weight": 790162 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman v Superman: Dawn of Justice", "weight": 713914 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman v Superman: Dawn of Justice", "weight": 713914 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman", "weight": 389337 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman", "weight": 389337 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman Returns", "weight": 315132 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman Returns", "weight": 315132 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batou", "weight": 260600 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batou", "weight": 260600 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman & Robin", "weight": 260430 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman & Robin", "weight": 260430 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman Forever", "weight": 259315 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman Forever", "weight": 259315 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Battleship", "weight": 252642 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Battleship", "weight": 252642 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Battlestar Galactica", "weight": 248435 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Battlestar Galactica", "weight": 248435 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batgirl", "weight": 221004 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batgirl", "weight": 221004 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Battle of the Bastards", "weight": 217000 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Battle of the Bastards", "weight": 217000 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batoru rowaiaru", "weight": 188692 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Battle Royale", "weight": 188692 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Battle: Los Angeles", "weight": 182720 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Battle Los Angeles", "weight": 182720 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batô", "weight": 148975 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batô", "weight": 148975 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batiatus", "weight": 141304 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batiatus", "weight": 141304 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Battera", "weight": 112813 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Battera", "weight": 112813 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Bates Motel", "weight": 112103 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Bates Motel", "weight": 112103 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: The Animated Series", "weight": 109143 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: The Animated Series", "weight": 109143 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Battle Droids", "weight": 107847 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Battle Droids", "weight": 107847 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Battlefield Earth", "weight": 81723 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Battlefield Earth", "weight": 81723 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bathsheba Doran", "weight": 76412 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Bate Khalil", "weight": 73385 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman v Superman: Dawn of Justice (Ultimate Edition)", "weight": 68865 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman v Superman: Dawn of Justice (Ultimate Edition)", "weight": 68865 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: Under the Red Hood", "weight": 65192 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: Under the Red Hood", "weight": 65192 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: The Dark Knight Returns, Part 1", "weight": 61312 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: The Dark Knight Returns, Part 1", "weight": 61312 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Battleship Potemkin", "weight": 59629 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: The Killing Joke", "weight": 59389 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: The Killing Joke", "weight": 59389 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Battle of the Sexes", "weight": 57648 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Battle of the Sexes", "weight": 57648 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Bathurst 2020", "weight": 55709 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Bathurst 2020", "weight": 55709 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: The Dark Knight Returns, Part 2", "weight": 53960 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: The Dark Knight Returns, Part 2", "weight": 53960 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: Mask of the Phantasm", "weight": 53743 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: Mask of the Phantasm", "weight": 53743 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batu", "weight": 51728 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batu", "weight": 51728 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Bathsheba Everdene", "weight": 51291 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Bathsheba Everdene", "weight": 51291 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batwoman", "weight": 45279 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batwoman", "weight": 45279 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batwoman", "weight": 45279 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batwoman", "weight": 45279 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batwing", "weight": 45279 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batwing", "weight": 45279 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batukeshwar 'Bittu' Tiwari", "weight": 41416 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batukeshwar 'Bittu' Tiwari", "weight": 41416 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: Year One", "weight": 36891 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: Year One", "weight": 36891 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: Assault on Arkham", "weight": 36460 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: Assault on Arkham", "weight": 36460 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Battal", "weight": 35502 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Battal", "weight": 35502 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: The Movie", "weight": 34606 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: The Movie", "weight": 34606 }, { "fieldName": "principals.people.primaryName", "fieldPath": "principals/people/primaryName", "fieldType": "string", "fieldValue": "Battle Davis", "weight": 33046 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Bathroom Attendant", "weight": 33012 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Bathroom Attendant", "weight": 33012 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Batman: Arkham City", "weight": 32852 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Batman: Arkham City", "weight": 32852 }, { "fieldName": "originalTitle", "fieldPath": "originalTitle", "fieldType": "string", "fieldValue": "Battle for the Planet of the Apes", "weight": 32287 }, { "fieldName": "primaryTitle", "fieldPath": "primaryTitle", "fieldType": "string", "fieldValue": "Battle for the Planet of the Apes", "weight": 32287 }, { "fieldName": "principals.characters.each", "fieldPath": "principals/characters/each", "fieldType": "string", "fieldValue": "Batty Koda", "weight": 32170 }, { "fieldName": "principals.characters.value", "fieldPath": "principals/characters/value", "fieldType": "string", "fieldValue": "Batty Koda", "weight": 32170 } ]
WITH __stage0 AS ( SELECT group_set, CASE group_set WHEN 0 THEN 'averageRating' WHEN 1 THEN 'endYear' WHEN 2 THEN 'isAdult' WHEN 3 THEN 'numVotes' WHEN 4 THEN 'originalTitle' WHEN 5 THEN 'primaryTitle' WHEN 6 THEN 'runtimeMinutes' WHEN 7 THEN 'startYear' WHEN 8 THEN 'tconst' WHEN 9 THEN 'genres.each' WHEN 10 THEN 'genres.value' WHEN 11 THEN 'principals.category' WHEN 12 THEN 'principals.job' WHEN 13 THEN 'principals.characters.each' WHEN 14 THEN 'principals.characters.value' WHEN 15 THEN 'principals.people.primaryName' END as "fieldName", CASE group_set WHEN 0 THEN 'averageRating' WHEN 1 THEN 'endYear' WHEN 2 THEN 'isAdult' WHEN 3 THEN 'numVotes' WHEN 4 THEN 'originalTitle' WHEN 5 THEN 'primaryTitle' WHEN 6 THEN 'runtimeMinutes' WHEN 7 THEN 'startYear' WHEN 8 THEN 'tconst' WHEN 9 THEN 'genres/each' WHEN 10 THEN 'genres/value' WHEN 11 THEN 'principals/category' WHEN 12 THEN 'principals/job' WHEN 13 THEN 'principals/characters/each' WHEN 14 THEN 'principals/characters/value' WHEN 15 THEN 'principals/people/primaryName' END as "fieldPath", CASE group_set WHEN 0 THEN 'number' WHEN 1 THEN 'number' WHEN 2 THEN 'string' WHEN 3 THEN 'number' WHEN 4 THEN 'string' WHEN 5 THEN 'string' WHEN 6 THEN 'number' WHEN 7 THEN 'number' WHEN 8 THEN 'string' WHEN 9 THEN 'string' WHEN 10 THEN 'string' WHEN 11 THEN 'string' WHEN 12 THEN 'string' WHEN 13 THEN 'string' WHEN 14 THEN 'string' WHEN 15 THEN 'string' END as "fieldType", CASE group_set WHEN 99999 THEN CAST(NULL as VARCHAR) WHEN 2 THEN base."isAdult" WHEN 4 THEN base."originalTitle" WHEN 5 THEN base."primaryTitle" WHEN 8 THEN base."tconst" WHEN 9 THEN genres_0 WHEN 10 THEN genres_0 WHEN 11 THEN principals_0."category" WHEN 12 THEN principals_0."job" WHEN 13 THEN characters_0 WHEN 14 THEN characters_0 WHEN 15 THEN people_0."primaryName" END as "fieldValue", COALESCE(( SELECT SUM(a.val) as value FROM ( SELECT UNNEST(list(distinct {key:base."__distinct_key", val: base."numVotes"})) a ) ),0) as "weight", CASE group_set WHEN 99999 THEN '' WHEN 0 THEN MIN(CAST(base."averageRating" as VARCHAR)) || ' to ' || CAST(MAX(base."averageRating") as VARCHAR) WHEN 1 THEN MIN(CAST(base."endYear" as VARCHAR)) || ' to ' || CAST(MAX(base."endYear") as VARCHAR) WHEN 3 THEN MIN(CAST(base."numVotes" as VARCHAR)) || ' to ' || CAST(MAX(base."numVotes") as VARCHAR) WHEN 6 THEN MIN(CAST(base."runtimeMinutes" as VARCHAR)) || ' to ' || CAST(MAX(base."runtimeMinutes") as VARCHAR) WHEN 7 THEN MIN(CAST(base."startYear" as VARCHAR)) || ' to ' || CAST(MAX(base."startYear") as VARCHAR) END as "fieldRange" FROM (SELECT GEN_RANDOM_UUID() as "__distinct_key", x.* FROM '../data/titles.parquet' as x) as base LEFT JOIN LATERAL (SELECT UNNEST(base."genres"), 1 as ignoreme) as genres_0_outer(genres_0,ignoreme) ON genres_0_outer.ignoreme=1 LEFT JOIN '../data/principals.parquet' AS principals_0 ON base."tconst"=principals_0."tconst" LEFT JOIN LATERAL (SELECT UNNEST(principals_0."characters"), 1 as ignoreme) as characters_0_outer(characters_0,ignoreme) ON characters_0_outer.ignoreme=1 LEFT JOIN '../data/names.parquet' AS people_0 ON principals_0."nconst"=people_0."nconst" CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,16,1)) as group_set ) as group_set GROUP BY 1,2,3,4,5 ) , __stage1 AS ( SELECT "fieldName", "fieldPath", "fieldType", COALESCE("fieldValue", "fieldRange") as "fieldValue", "weight" FROM __stage0 ) SELECT base."fieldName" as "fieldName", base."fieldPath" as "fieldPath", base."fieldType" as "fieldType", base."fieldValue" as "fieldValue", base."weight" as "weight" FROM __stage1 as base WHERE base."fieldValue" LIKE 'Bat%' ORDER BY 5 desc NULLS LAST