当前位置：首页 > article >正文

Postgresql源码（142）子查询提升pull_up_sublinks

article 2025/3/29 8:37:46

1 案例

drop table t_fun01;
create table t_fun01 (image_id numeric primary key, content_id varchar(50), file_code varchar(20));
create index idx3 on t_fun01(content_id);
create index idx4 on t_fun01(file_code);
insert into t_fun01 select t.i, t.i%10, t.i%100 from generate_series(1,1000000) t(i);
analyze t_fun01;
explain analyze update t_fun01 t set content_id = 'null' where t.image_id in (select max(image_id) from t_fun01 where file_code = '1');

从计划上看，nestloop挂子查询Index Scan Backward确实是能想到的最优的计划：

postgres=# explain analyze update t_fun01 t set content_id = 'null' where t.image_id in (select max(image_id) from t_fun01 where file_code = '1');
                                                                              QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Update on t_fun01 t  (cost=4.69..12.73 rows=0 width=0) (actual time=0.257..0.259 rows=0 loops=1)
   ->  Nested Loop  (cost=4.69..12.73 rows=1 width=180) (actual time=0.135..0.138 rows=1 loops=1)
         ->  Subquery Scan on "ANY_subquery"  (cost=4.27..4.29 rows=1 width=88) (actual time=0.117..0.119 rows=1 loops=1)
               ->  Result  (cost=4.27..4.28 rows=1 width=32) (actual time=0.112..0.113 rows=1 loops=1)
                     InitPlan 1 (returns $0)
                       ->  Limit  (cost=0.42..4.27 rows=1 width=6) (actual time=0.107..0.108 rows=1 loops=1)
                             ->  Index Scan Backward using t_fun01_pkey on t_fun01  (cost=0.42..36389.43 rows=9467 width=6) (actual time=0.106..0.106 rows=1 loops=1)
                                   Index Cond: (image_id IS NOT NULL)
                                   Filter: ((file_code)::text = '1'::text)
                                   Rows Removed by Filter: 99
         ->  Index Scan using t_fun01_pkey on t_fun01 t  (cost=0.42..8.44 rows=1 width=12) (actual time=0.014..0.014 rows=1 loops=1)
               Index Cond: (image_id = "ANY_subquery".max)
 Planning Time: 0.532 ms
 Execution Time: 0.345 ms
(14 rows

优化器是如何上拉子查询的呢？

2 上拉子查询流程分析

2.1 pull_up_sublinks

pull_up_sublinks 是优化器中用于子链接（SubLink）上拉优化的核心函数。
通过递归处理查询树中的子链接（如 ANY、EXISTS），将其转换为**半连接（Semi Join）或反半连接（Anti Join）**结构，从而提升查询性能。

在当前案例中，pull_up_sublinks函数拿到的Query树如下：
在这里插入图片描述
pull_up_sublinks递归入口：

调用 pull_up_sublinks_jointree_recurse 遍历 jointree，处理 RangeTblRef（单表）、FromExpr（FROM列表）和 JoinExpr（JOIN操作）中的子链接。
调用 pull_up_sublinks_qual_recurse 处理条件表达式中的子链接，如 WHERE col = ANY (SELECT ...)。

子链接转换逻辑：

ANY 子链接：通过 convert_ANY_sublink_to_join 转换为 Semi Join，生成新的连接条件（如 col = t2.key）
EXISTS 子链接：通过 convert_EXISTS_sublink_to_join 转换为 Anti Join，优化为外连接形式

2.2 convert_ANY_sublink_to_join

(gdb) bt
#0  convert_ANY_sublink_to_join (root=0x2b64f60, sublink=0x2a6f848, available_rels=0x2b653c0) at subselect.c:1272
#1  0x00000000008a1606 in pull_up_sublinks_qual_recurse (root=0x2b64f60, node=0x2a6f848, jtlink1=0x7ffd2e7e8318, available_rels1=0x2b653c0, jtlink2=0x0, available_rels2=0x0)
    at prepjointree.c:508
#2  0x00000000008a12e1 in pull_up_sublinks_jointree_recurse (root=0x2b64f60, jtnode=0x2b64cb0, relids=0x7ffd2e7e8380) at prepjointree.c:379
#3  0x00000000008a10e4 in pull_up_sublinks (root=0x2b64f60) at prepjointree.c:315
#4  0x0000000000886ad1 in subquery_planner (glob=0x2a6f968, parse=0x2a6fa78, parent_root=0x0, hasRecursion=false, tuple_fraction=0) at planner.c:689
#5  0x0000000000885e98 in standard_planner (parse=0x2a6fa78,
    query_string=0x2a6e538 "update t_fun01 t set content_id = 'null' where t.image_id in (select max(image_id) from t_fun01 where file_code = '1');", cursorOptions=2048,
    boundParams=0x0) at planner.c:413

进入convert_ANY_sublink_to_join时available_rels=[1,0]指向t_image，拿到的sublink：
在这里插入图片描述
convert_ANY_sublink_to_join

输入：调用者已在查询的某个条件子句（qual clause）顶层发现了一个 ANY 子链接，但尚未进一步检查该子链接的属性。
功能：判断是否适合以连接方式处理此子链接。若适合，构造并返回一个 JoinExpr 结构；否则返回 NULL（表示无法转换）。
关键限制参数：available_rels
- 含义：允许在子链接表达式中安全引用的父查询关系集合（即表或视图的标识集合）。
- 作用：避免当子链接出现在外连接（Outer Join）的 ON 条件中时，因引用非法关系而改变查询语义。
- 强制要求：若转换后的条件引用了 available_rels 之外的父查询关系，则转换必须失败。

JoinExpr *
convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
							Relids available_rels)

第一步判断：pull_varnos拿到子查询里面引用了那些level=0的基表，这里upper_varnos只有一个元素1。因为只用了一个RTE：t_fun01

	upper_varnos = pull_varnos(root, sublink->testexpr);
	if (bms_is_empty(upper_varnos))
		return NULL;

第二步判断：不能引用available_rels之外的表。

	if (!bms_is_subset(upper_varnos, available_rels))
		return NULL;

第三步判断：不能带v类型的函数。

	if (contain_volatile_functions(sublink->testexpr))
		return NULL;

到这里就说明可以转，开始拼join，增加一个RTE叫做ANY_subquery，所以可以在计划里面看到这个名字。

	pstate = make_parsestate(NULL);
	nsitem = addRangeTableEntryForSubquery(pstate,
										   subselect,
										   makeAlias("ANY_subquery", NIL),
										   false,
										   false);
	rte = nsitem->p_rte;
	parse->rtable = lappend(parse->rtable, rte);
	rtindex = list_length(parse->rtable);
	rtr = makeNode(RangeTblRef);
	rtr->rtindex = rtindex;
	subquery_vars = generate_subquery_vars(root,
										   subselect->targetList,
										   rtindex);
	quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
	result = makeNode(JoinExpr);
	result->jointype = JOIN_SEMI;
	result->isNatural = false;
	result->larg = NULL;		/* caller must fill this in */
	result->rarg = (Node *) rtr;
	result->usingClause = NIL;
	result->join_using_alias = NULL;
	result->quals = quals;
	result->alias = NULL;
	result->rtindex = 0;
	return result;

3 pull_varnos

功能

工具函数，其核心作用是从语法树中提取所有引用“零层级”基表RTE的变量编号varnos集合。
varno：变量编号，唯一标识查询中引用的基表（如 FROM 后的表、子查询、连接结果等）。
零层级（level-zero）：指当前查询层级的基表（不包含嵌套子查询中的表）。
nullingrels：外连接中可能导致列值为 NULL 的基表集合（如 LEFT JOIN 的右表）。

工作方式：

query_or_expression_tree_walker递归遍历语法树，调用 pull_varnos_walker 处理每个节点
Var 节点：提取其 varno 和 varnullingrels（外连接空值关系）。
PlaceHolderVar 节点：若 root 非空，提取其 phnullingrels。
SubLink 节点：递归处理子链接内的表达式，捕捉上层的 varno 引用。
SubPlan 节点：仅处理传递给子计划的参数（子计划内部已独立优化）。

层级控制

sublevels_up 计数器：跟踪嵌套子查询的层级。
当进入子查询时，计数器递增，仅收集当前层级（零层级）的 varno。
嵌套子查询中的 varno 会被忽略（因其属于深层级）。

Relids
pull_varnos(PlannerInfo *root, Node *node)
{
	pull_varnos_context context;

	context.varnos = NULL;
	context.root = root;
	context.sublevels_up = 0;

	/*
	 * Must be prepared to start with a Query or a bare expression tree; if
	 * it's a Query, we don't want to increment sublevels_up.
	 */
	query_or_expression_tree_walker(node,
									pull_varnos_walker,
									(void *) &context,
									0);

	return context.varnos;
}

查看全文

http://www.kler.cn/a/599130.html