本文以select * from (values (1,1),(2,2)) as foo;
为例,分析一下ValueScan其源码执行过程。
语法解析层
Select SQL语句字符串输入到数据库后,会首先在语法解析层表示成抽象语法树SelectStmt
,进而经过语义分析,转换为查询树Query
。 将values的信息保存在了valuesLists字段中。
exec_simple_query
--> pg_parse_query--> raw_parser--> base_yyparse
--> pg_analyze_and_rewrite
核心结构体
typedef struct SelectStmt
{NodeTag type;/* These fields are used only in "leaf" SelectStmts.*/List *distinctClause; /* NULL, list of DISTINCT ON exprs, or* lcons(NIL,NIL) for all (SELECT DISTINCT) */IntoClause *intoClause; /* target for SELECT INTO */List *targetList; /* the target list (of ResTarget) */List *fromClause; /* the FROM clause */Node *whereClause; /* WHERE qualification */List *groupClause; /* GROUP BY clauses */Node *havingClause; /* HAVING conditional-expression */List *windowClause; /* WINDOW window_name AS (...), ... *//** In a "leaf" node representing a VALUES list, the above fields are all* null, and instead this field is set. Note that the elements of the* sublists are just expressions, without ResTarget decoration. Also note* that a list element can be DEFAULT (represented as a SetToDefault* node), regardless of the context of the VALUES list. It's up to parse* analysis to reject that where not valid.*/List *valuesLists; /* untransformed list of expression lists *//* These fields are used in both "leaf" SelectStmts and upper-level SelectStmts.*/List *sortClause; /* sort clause (a list of SortBy's) */Node *limitOffset; /* # of result tuples to skip */Node *limitCount; /* # of result tuples to return */LimitOption limitOption; /* limit type */List *lockingClause; /* FOR UPDATE (list of LockingClause's) */WithClause *withClause; /* WITH clause *//* These fields are used only in upper-level SelectStmts.*/SetOperation op; /* type of set op */bool all; /* ALL specified? */struct SelectStmt *larg; /* left child */struct SelectStmt *rarg; /* right child */
} SelectStmt;/* RangeSubselect - subquery appearing in a FROM clause */
typedef struct RangeSubselect
{NodeTag type;bool lateral; /* does it have LATERAL prefix? */Node *subquery; /* the untransformed sub-select clause */Alias *alias; /* table alias & optional column aliases */
} RangeSubselect;
gram.y中语法定义分析:
-- 表示的 from () as foo
from_clause:FROM from_list { $$ = $2; }| /*EMPTY*/ { $$ = NIL; };from_list:table_ref { $$ = list_make1($1); }| from_list ',' table_ref { $$ = lappend($1, $3); };/** table_ref is where an alias clause can be attached.*/
table_ref: relation_expr opt_alias_clause{$1->alias = $2;$$ = (Node *) $1;}| select_with_parens opt_alias_clause{ -- 将values 表示为了子查询 RangeSubselect *n = makeNode(RangeSubselect);n->lateral = false;n->subquery = $1;n->alias = $2;if ($2 == NULL){if (IsA($1, SelectStmt) &&((SelectStmt *) $1)->valuesLists)ereport(ERROR,(errcode(ERRCODE_SYNTAX_ERROR),errmsg("VALUES in FROM must have an alias"),errhint("For example, FROM (VALUES ...) [AS] foo."),parser_errposition(@1)));}$$ = (Node *) n;}select_with_parens:'(' select_no_parens ')' { $$ = $2; }select_no_parens:simple_select { $$ = $1; }simple_select:SELECT opt_all_clause opt_target_listinto_clause from_clause where_clausegroup_clause having_clause window_clause{SelectStmt *n = makeNode(SelectStmt);n->targetList = $3;n->intoClause = $4;n->fromClause = $5;n->whereClause = $6;n->groupClause = ($7)->list;n->havingClause = $8;n->windowClause = $9;$$ = (Node *)n;}| values_clause { $$ = $1; }-- 表示 values ( (1,1), (2,2))
values_clause:VALUES '(' expr_list ')'{SelectStmt *n = makeNode(SelectStmt);n->valuesLists = list_make1($3);$$ = (Node *) n;}| values_clause ',' '(' expr_list ')'{SelectStmt *n = (SelectStmt *) $1;n->valuesLists = lappend(n->valuesLists, $4);$$ = (Node *) n;};
最终表示为:
// select * from Subquery, Subquery is (Values (1,1),(2,2)) as foo;
SelectStmt (top-sql)
{targetList : '*'fromClause : RangeSubselect
}
// 子查询范围表
RangeSubselect
{subquery: SelectStmt (Subquery is (Values (1,1),(2,2)))alias: as foo
}
// 子查询
SelectStmt
{valuesLists: `(Values (1,1),(2,2))`
}
语义分析
将上面的抽象语法树SelectStmt
转换为查询树Query
。检查语义是否合法。流程如下:
pg_analyze_and_rewrite
--> pg_analyze--> transformStmt--> transformSelectStmt--> transformFromClause // 处理子查询范围表--> transformFromClauseItem--> transformRangeSubselect--> parse_sub_analyze // 处理子查询,生成子查询的查询树Query--> transformStmt--> transformValuesClause // 处理子查询中的values表达式--> transformExpressionList--> transformExpr--> addRangeTableEntryForValues--> addRangeTableEntryForSubquery //将values expr list 放到rte->values_lists字段中--> transformTargetList
--> pg_rewrite_query
关键数据结构:
// 范围表类型
typedef enum RTEKind
{RTE_RELATION, /* ordinary relation reference */RTE_SUBQUERY, /* subquery in FROM */RTE_JOIN, /* join */RTE_FUNCTION, /* function in FROM */RTE_TABLEFUNC, /* TableFunc(.., column list) */RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */RTE_CTE, /* common table expr (WITH list element) */RTE_NAMEDTUPLESTORE, /* tuplestore, e.g. for AFTER triggers */RTE_RESULT /* RTE represents an empty FROM clause; such* RTEs are added by the planner, they're not* present during parsing or rewriting */
} RTEKind;
// 范围表
typedef struct RangeTblEntry
{NodeTag type;RTEKind rtekind; /* see above *//** Fields valid for a plain relation RTE (else zero):*/Oid relid; /* OID of the relation */char relkind; /* relation kind (see pg_class.relkind) */int rellockmode; /* lock level that query requires on the rel */struct TableSampleClause *tablesample; /* sampling info, or NULL *//** Fields valid for a subquery RTE (else NULL):*/Query *subquery; /* the sub-query */bool security_barrier; /* is from security_barrier view? *//** Fields valid for a join RTE (else NULL/zero):*/JoinType jointype; /* type of join */int joinmergedcols; /* number of merged (JOIN USING) columns */List *joinaliasvars; /* list of alias-var expansions */List *joinleftcols; /* left-side input column numbers */List *joinrightcols; /* right-side input column numbers *//** Fields valid for a function RTE (else NIL/zero):*/List *functions; /* list of RangeTblFunction nodes */bool funcordinality; /* is this called WITH ORDINALITY? *//** Fields valid for a TableFunc RTE (else NULL):*/TableFunc *tablefunc;/** Fields valid for a values RTE (else NIL):*/List *values_lists; /* list of expression lists */ // Values表达式列表/** Fields valid for a CTE RTE (else NULL/zero):*/char *ctename; /* name of the WITH list item */Index ctelevelsup; /* number of query levels up */bool self_reference; /* is this a recursive self-reference? *//** Fields valid for CTE, VALUES, ENR, and TableFunc RTEs (else NIL):*/List *coltypes; /* OID list of column type OIDs */List *coltypmods; /* integer list of column typmods */List *colcollations; /* OID list of column collation OIDs *//** Fields valid for ENR RTEs (else NULL/zero):*/char *enrname; /* name of ephemeral named relation */double enrtuples; /* estimated or actual from caller *//** Fields valid in all RTEs:*/Alias *alias; /* user-written alias clause, if any */Alias *eref; /* expanded reference names */bool lateral; /* subquery, function, or values is LATERAL? */bool inh; /* inheritance requested? */bool inFromCl; /* present in FROM clause? */AclMode requiredPerms; /* bitmask of required access permissions */Oid checkAsUser; /* if valid, check access as this role */Bitmapset *selectedCols; /* columns needing SELECT permission */Bitmapset *insertedCols; /* columns needing INSERT permission */Bitmapset *updatedCols; /* columns needing UPDATE permission */Bitmapset *extraUpdatedCols; /* generated columns being updated */List *securityQuals; /* security barrier quals to apply, if any */const char *qb_name;
} RangeTblEntry;
关键处理逻辑:
// 子查询
SelectStmt
{valuesLists: `(Values (1,1),(2,2))`
}
会将valuesLists转为RangeTblEntry
,相关信息保存到values_list字段中。子查询SelectStmt
转为子查询树Query
。
// 子查询范围表
RangeSubselect
{subquery: SelectStmt (Subquery is (Values (1,1),(2,2)))alias: as foo
}
RangeSubselect
转为父查询的RangeTblEntry
,
// select * from Subquery, Subquery is (Values (1,1),(2,2)) as foo;
SelectStmt (top-sql)
{targetList : '*'fromClause : RangeSubselect
}
SelectStmt
转为top查询树Query
优化器
通过查询树生成最优路径,再生成最优执行计划。
优化器主流程如下:
pg_plan_queries
--> pg_plan_query--> planner--> standard_planner--> subquery_planner--> pull_up_subqueries // 上拉子查询--> grouping_planner--> query_planner--> setup_simple_rel_arrays--> add_base_rels_to_query--> build_simple_rel--> make_one_rel--> set_base_rel_sizes--> set_rel_size--> set_values_size_estimates--> set_baserel_size_estimates--> set_base_rel_pathlists--> set_rel_pathlist--> set_values_pathlist--> create_valuesscan_path // 生成ValueScanPath--> make_rel_from_joinlist--> apply_scanjoin_target_to_paths--> create_plan--> create_scan_plan--> create_valuesscan_plan
重点函数分析:最重要的是看一下VauleScan执行计划的生成
/** create_valuesscan_plan* Returns a valuesscan plan for the base relation scanned by 'best_path'* with restriction clauses 'scan_clauses' and targetlist 'tlist'.*/
ValuesScan *
create_valuesscan_plan(PlannerInfo *root, Path *best_path, List *tlist, List *scan_clauses)
{ValuesScan *scan_plan;Index scan_relid = best_path->parent->relid;RangeTblEntry *rte;List *values_lists;rte = planner_rt_fetch(scan_relid, root);values_lists = rte->values_lists; //存放在RangeTblEntry中的values_lists/* Sort clauses into best execution order */scan_clauses = order_qual_clauses(root, scan_clauses);/* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */scan_clauses = extract_actual_clauses(scan_clauses, false);/* Replace any outer-relation variables with nestloop params */if (best_path->param_info){scan_clauses = (List *)replace_nestloop_params(root, (Node *) scan_clauses);/* The values lists could contain nestloop params, too */values_lists = (List *)replace_nestloop_params(root, (Node *) values_lists);}scan_plan = make_valuesscan(tlist, scan_clauses, scan_relid, values_lists);copy_generic_path_info(&scan_plan->scan.plan, best_path);return scan_plan;
}ValuesScan *
make_valuesscan(List *qptlist,List *qpqual,Index scanrelid,List *values_lists)
{ValuesScan *node = makeNode(ValuesScan);Plan *plan = &node->scan.plan;plan->targetlist = qptlist;plan->qual = qpqual;plan->lefttree = NULL;plan->righttree = NULL;node->scan.scanrelid = scanrelid;node->values_lists = values_lists; // values expr listreturn node;
}
最终的执行计划:
postgres@postgres=# explain select * from (values (1,1), (2,2)) as foo;QUERY PLAN
-------------------------------------------------------------Values Scan on "*VALUES*" (cost=0.00..0.03 rows=2 width=8)
(1 row)
执行器
输入执行计划,输出最终结果。
PortalStart
--> ExecutorStart--> InitPlan--> ExecInitValuesScan
PortalRun
--> ExecutorRun--> ExecutePlan--> ExecValuesScan // 从这里开始,ExecValuesScan、ExecScan、ExecScanFetch、ValuesNext都要读懂--> ExecScan--> ExecScanFetch--> ValuesNext // 从 values expr list中获取值 转为tuple
PortalDrop
执行器算子实现源码如下:因为这几个算子都是比较重要且高频的算子,所以我们把他的源码全部列了出来,要把整个都读懂才行。
/* ----------------------------------------------------------------* ExecValuesScan(node)** Scans the values lists sequentially and returns the next qualifying tuple.* We call the ExecScan() routine and pass it the appropriate access method functions.* ----------------------------------------------------------------*/
TupleTableSlot *ExecValuesScan(PlanState *pstate)
{ValuesScanState *node = castNode(ValuesScanState, pstate);return ExecScan(&node->ss,(ExecScanAccessMtd) ValuesNext, // 这个参数最重要,access method实现(ExecScanRecheckMtd) ValuesRecheck);
}/* ----------------------------------------------------------------* ExecScan** Scans the relation using the 'access method' indicated and* returns the next qualifying tuple.* The access method returns the next tuple and ExecScan() is* responsible for checking the tuple returned against the qual-clause.** A 'recheck method' must also be provided that can check an* arbitrary tuple of the relation against any qual conditions* that are implemented internal to the access method.** Conditions:* -- the "cursor" maintained by the AMI is positioned at the tuple* returned previously.** Initial States:* -- the relation indicated is opened for scanning so that the* "cursor" is positioned before the first qualifying tuple.* ----------------------------------------------------------------*/
TupleTableSlot *
ExecScan(ScanState *node, ExecScanAccessMtd accessMtd, ExecScanRecheckMtd recheckMtd)
{ExprContext *econtext;ExprState *qual;ProjectionInfo *projInfo;/* Fetch data from node */qual = node->ps.qual;projInfo = node->ps.ps_ProjInfo;econtext = node->ps.ps_ExprContext;/* If we have neither a qual to check nor a projection to do, just skip* all the overhead and return the raw scan tuple.*/if (!qual && !projInfo){ResetExprContext(econtext);return ExecScanFetch(node, accessMtd, recheckMtd);}/* Reset per-tuple memory context to free any expression evaluation* storage allocated in the previous tuple cycle.*/ResetExprContext(econtext);/* get a tuple from the access method. Loop until we obtain a tuple that* passes the qualification. */for (;;){TupleTableSlot *slot;slot = ExecScanFetch(node, accessMtd, recheckMtd);/* if the slot returned by the accessMtd contains NULL, then it means* there is nothing more to scan so we just return an empty slot,* being careful to use the projection result slot so it has correct tupleDesc. */if (TupIsNull(slot)){if (projInfo)return ExecClearTuple(projInfo->pi_state.resultslot);elsereturn slot;}/* place the current tuple into the expr context */econtext->ecxt_scantuple = slot;/* check that the current tuple satisfies the qual-clause** check for non-null qual here to avoid a function call to ExecQual()* when the qual is null ... saves only a few cycles, but they add up ... */if (qual == NULL || ExecQual(qual, econtext)){/* Found a satisfactory scan tuple. */if (projInfo){/* Form a projection tuple, store it in the result tuple slot and return it. */return ExecProject(projInfo);} else{return slot; /* Here, we aren't projecting, so just return scan tuple. */}}elseInstrCountFiltered1(node, 1);/* Tuple fails qual, so free per-tuple memory and try again. */ResetExprContext(econtext);}
}/* ----------------------------------------------------------------* ValuesNext** This is a workhorse for ExecValuesScan* ----------------------------------------------------------------*/
TupleTableSlot *ValuesNext(ValuesScanState *node)
{TupleTableSlot *slot;EState *estate;ExprContext *econtext;ScanDirection direction;int curr_idx;/* get information from the estate and scan state */estate = node->ss.ps.state;direction = estate->es_direction;slot = node->ss.ss_ScanTupleSlot;econtext = node->rowcontext;/* Get the next tuple. Return NULL if no more tuples. */if (ScanDirectionIsForward(direction)){if (node->curr_idx < node->array_len)node->curr_idx++;} else{if (node->curr_idx >= 0)node->curr_idx--;}/* Always clear the result slot; this is appropriate if we are at the end* of the data, and if we're not, we still need it as the first step of* the store-virtual-tuple protocol. It seems wise to clear the slot* before we reset the context it might have pointers into. */ExecClearTuple(slot);curr_idx = node->curr_idx;if (curr_idx >= 0 && curr_idx < node->array_len){List *exprlist = node->exprlists[curr_idx];List *exprstatelist = node->exprstatelists[curr_idx];MemoryContext oldContext;Datum *values;bool *isnull;ListCell *lc;int resind;/* Get rid of any prior cycle's leftovers. We use ReScanExprContext* not just ResetExprContext because we want any registered shutdown callbacks to be called. */ReScanExprContext(econtext);/* Do per-VALUES-row work in the per-tuple context.*/oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);/** Unless we already made the expression eval state for this row,* build it in the econtext's per-tuple memory. This is a tad* unusual, but we want to delete the eval state again when we move to* the next row, to avoid growth of memory requirements over a long* values list. For rows in which that won't work, we already built* the eval state at plan startup.*/if (exprstatelist == NIL){/** Pass parent as NULL, not my plan node, because we don't want* anything in this transient state linking into permanent state.* The only expression type that might wish to do so is a SubPlan,* and we already checked that there aren't any.** Note that passing parent = NULL also disables JIT compilation* of the expressions, which is a win, because they're only going* to be used once under normal circumstances.*/exprstatelist = ExecInitExprList(exprlist, NULL);}/** Compute the expressions and build a virtual result tuple. We* already did ExecClearTuple(slot).*/values = slot->tts_values;isnull = slot->tts_isnull;resind = 0;foreach(lc, exprstatelist){ExprState *estate = (ExprState *) lfirst(lc);Form_pg_attribute attr = TupleDescAttr(slot->tts_tupleDescriptor, resind);values[resind] = ExecEvalExpr(estate, econtext, &isnull[resind]);/** We must force any R/W expanded datums to read-only state, in* case they are multiply referenced in the plan node's output* expressions, or in case we skip the output projection and the* output column is multiply referenced in higher plan nodes.*/values[resind] = MakeExpandedObjectReadOnly(values[resind],isnull[resind],attr->attlen);resind++;}MemoryContextSwitchTo(oldContext);ExecStoreVirtualTuple(slot); /* And return the virtual tuple. */}return slot;
}