千家信息网

PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么

发表于:2025-11-09 作者:千家信息网编辑
千家信息网最后更新 2025年11月09日,这篇文章主要介绍"PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么",在日常操作中,相信很多人在PostgreSQL中query_planner中
千家信息网最后更新 2025年11月09日PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么

这篇文章主要介绍"PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么",在日常操作中,相信很多人在PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答"PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么"的疑惑有所帮助!接下来,请跟着小编一起来学习吧!

query_planner代码片段:

     //...     /*      * Ready to do the primary planning.      */     final_rel = make_one_rel(root, joinlist);//执行主要的计划过程      /* Check that we got at least one usable path */     if (!final_rel || !final_rel->cheapest_total_path ||         final_rel->cheapest_total_path->param_info != NULL)         elog(ERROR, "failed to construct the join relation");//检查      return final_rel;//返回结果     //...

一、数据结构

RelOptInfo

 typedef struct RelOptInfo {     NodeTag     type;//节点标识      RelOptKind  reloptkind;//RelOpt类型      /* all relations included in this RelOptInfo */     Relids      relids;         /*Relids(rtindex)集合 set of base relids (rangetable indexes) */      /* size estimates generated by planner */     double      rows;           /*结果元组的估算数量 estimated number of result tuples */      /* per-relation planner control flags */     bool        consider_startup;   /*是否考虑启动成本?是,需要保留启动成本低的路径 keep cheap-startup-cost paths? */     bool        consider_param_startup; /*是否考虑参数化?的路径 ditto, for parameterized paths? */     bool        consider_parallel;  /*是否考虑并行处理路径 consider parallel paths? */      /* default result targetlist for Paths scanning this relation */     struct PathTarget *reltarget;   /*扫描该Relation时默认的结果 list of Vars/Exprs, cost, width */      /* materialization information */     List       *pathlist;       /*访问路径链表 Path structures */     List       *ppilist;        /*路径链表中使用参数化路径进行 ParamPathInfos used in pathlist */     List       *partial_pathlist;   /* partial Paths */     struct Path *cheapest_startup_path;//代价最低的启动路径     struct Path *cheapest_total_path;//代价最低的整体路径     struct Path *cheapest_unique_path;//代价最低的获取唯一值的路径     List       *cheapest_parameterized_paths;//代价最低的参数化?路径链表      /* parameterization information needed for both base rels and join rels */     /* (see also lateral_vars and lateral_referencers) */     Relids      direct_lateral_relids;  /*使用lateral语法,需依赖的Relids rels directly laterally referenced */     Relids      lateral_relids; /* minimum parameterization of rel */      /* information about a base rel (not set for join rels!) */     //reloptkind=RELOPT_BASEREL时使用的数据结构     Index       relid;          /* Relation ID */     Oid         reltablespace;  /* 表空间 containing tablespace */     RTEKind     rtekind;        /* 基表?子查询?还是函数等等?RELATION, SUBQUERY, FUNCTION, etc */     AttrNumber  min_attr;       /* 最小的属性编号 smallest attrno of rel (often <0) */     AttrNumber  max_attr;       /* 最大的属性编号 largest attrno of rel */     Relids     *attr_needed;    /* 数组 array indexed [min_attr .. max_attr] */     int32      *attr_widths;    /* 属性宽度 array indexed [min_attr .. max_attr] */     List       *lateral_vars;   /* 关系依赖的Vars/PHVs LATERAL Vars and PHVs referenced by rel */     Relids      lateral_referencers;    /*依赖该关系的Relids rels that reference me laterally */     List       *indexlist;      /* 该关系的IndexOptInfo链表 list of IndexOptInfo */     List       *statlist;       /* 统计信息链表 list of StatisticExtInfo */     BlockNumber pages;          /* 块数 size estimates derived from pg_class */     double      tuples;         /* 元组数 */     double      allvisfrac;     /* ? */     PlannerInfo *subroot;       /* 如为子查询,存储子查询的root if subquery */     List       *subplan_params; /* 如为子查询,存储子查询的参数 if subquery */     int         rel_parallel_workers;   /* 并行执行,需要多少个workers? wanted number of parallel workers */      /* Information about foreign tables and foreign joins */     //FWD相关信息     Oid         serverid;       /* identifies server for the table or join */     Oid         userid;         /* identifies user to check access as */     bool        useridiscurrent;    /* join is only valid for current user */     /* use "struct FdwRoutine" to avoid including fdwapi.h here */     struct FdwRoutine *fdwroutine;     void       *fdw_private;      /* cache space for remembering if we have proven this relation unique */     //已知的,可保证唯一的Relids链表     List       *unique_for_rels;    /* known unique for these other relid                                      * set(s) */     List       *non_unique_for_rels;    /* 已知的,不唯一的Relids链表 known not unique for these set(s) */      /* used by various scans and joins: */     List       *baserestrictinfo;   /* 如为基本关系,存储约束条件 RestrictInfo structures (if base rel) */     QualCost    baserestrictcost;   /* 解析约束表达式的成本? cost of evaluating the above */     Index       baserestrict_min_security;  /* 最低安全等级 min security_level found in                                              * baserestrictinfo */     List       *joininfo;       /* 连接语句的约束条件信息 RestrictInfo structures for join clauses                                  * involving this rel */     bool        has_eclass_joins;   /* 是否存在等价类连接? T means joininfo is incomplete */      /* used by partitionwise joins: */     bool        consider_partitionwise_join;    /* 分区? consider partitionwise                                                  * join paths? (if                                                  * partitioned rel) */     Relids      top_parent_relids;  /* Relids of topmost parents (if "other"                                      * rel) */      /* used for partitioned relations */     //分区表使用     PartitionScheme part_scheme;    /* 分区的schema Partitioning scheme. */     int         nparts;         /* 分区数 number of partitions */     struct PartitionBoundInfoData *boundinfo;   /* 分区边界信息 Partition bounds */     List       *partition_qual; /* 分区约束 partition constraint */     struct RelOptInfo **part_rels;  /* 分区的RelOptInfo数组 Array of RelOptInfos of partitions,                                      * stored in the same order of bounds */     List      **partexprs;      /* 非空分区键表达式 Non-nullable partition key expressions. */     List      **nullable_partexprs; /* 可为空的分区键表达式 Nullable partition key expressions. */     List       *partitioned_child_rels; /* RT Indexes链表 List of RT indexes. */ } RelOptInfo;

二、源码解读

make_one_rel
make_one_rel函数找出执行查询的所有可能访问路径,但不考虑上层的Non-SPJ操作,返回一个最上层的RelOptInfo.
make_one_rel函数分为两个阶段:生成扫描路径(set_base_rel_pathlists)和生成连接路径(make_rel_from_joinlist).
注:SPJ是指选择(Select)/投影(Project)/连接(Join),相对应的Non-SPJ操作是指Group分组/Sort排序等操作

 /*  * make_one_rel  *    Finds all possible access paths for executing a query, returning a  *    single rel that represents the join of all base rels in the query.  */ RelOptInfo * make_one_rel(PlannerInfo *root, List *joinlist) {     RelOptInfo *rel;     Index       rti;      /*      * Construct the all_baserels Relids set.      */     root->all_baserels = NULL;     for (rti = 1; rti < root->simple_rel_array_size; rti++)//遍历RelOptInfo     {         RelOptInfo *brel = root->simple_rel_array[rti];          /* there may be empty slots corresponding to non-baserel RTEs */         if (brel == NULL)             continue;          Assert(brel->relid == rti); /* sanity check on array */          /* ignore RTEs that are "other rels" */         if (brel->reloptkind != RELOPT_BASEREL)             continue;          root->all_baserels = bms_add_member(root->all_baserels, brel->relid);//添加到all_baserels遍历中     }      /* Mark base rels as to whether we care about fast-start plans */     //设置RelOptInfo的consider_param_startup变量,是否考量fast-start plans     set_base_rel_consider_startup(root);      /*      * Compute size estimates and consider_parallel flags for each base rel,      * then generate access paths.      */     set_base_rel_sizes(root);//估算Relation的Size并且设置consider_parallel标记     set_base_rel_pathlists(root);//生成Relation的扫描(访问)路径      /*      * Generate access paths for the entire join tree.      * 通过动态规划或遗传算法生成连接路径       */     rel = make_rel_from_joinlist(root, joinlist);      /*      * The result should join all and only the query's base rels.      */     Assert(bms_equal(rel->relids, root->all_baserels));     //返回最上层的RelOptInfo     return rel; }//-------------------------------------------------------- /*  * set_base_rel_consider_startup  *    Set the consider_[param_]startup flags for each base-relation entry.  *  * For the moment, we only deal with consider_param_startup here; because the  * logic for consider_startup is pretty trivial and is the same for every base  * relation, we just let build_simple_rel() initialize that flag correctly to  * start with.  If that logic ever gets more complicated it would probably  * be better to move it here.  */ static void set_base_rel_consider_startup(PlannerInfo *root) {     /*      * Since parameterized paths can only be used on the inside of a nestloop      * join plan, there is usually little value in considering fast-start      * plans for them.  However, for relations that are on the RHS of a SEMI      * or ANTI join, a fast-start plan can be useful because we're only going      * to care about fetching one tuple anyway.      *      * To minimize growth of planning time, we currently restrict this to      * cases where the RHS is a single base relation, not a join; there is no      * provision for consider_param_startup to get set at all on joinrels.      * Also we don't worry about appendrels.  costsize.c's costing rules for      * nestloop semi/antijoins don't consider such cases either.      */     ListCell   *lc;      foreach(lc, root->join_info_list)     {         SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);         int         varno;          if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) &&             bms_get_singleton_member(sjinfo->syn_righthand, &varno))         {             RelOptInfo *rel = find_base_rel(root, varno);              rel->consider_param_startup = true;         }     } }//-------------------------------------------------------- /*  * set_base_rel_sizes  *    Set the size estimates (rows and widths) for each base-relation entry.  *    Also determine whether to consider parallel paths for base relations.  *  * We do this in a separate pass over the base rels so that rowcount  * estimates are available for parameterized path generation, and also so  * that each rel's consider_parallel flag is set correctly before we begin to  * generate paths.  */ static void set_base_rel_sizes(PlannerInfo *root) {     Index       rti;      for (rti = 1; rti < root->simple_rel_array_size; rti++)//遍历RelOptInfo数组     {         RelOptInfo *rel = root->simple_rel_array[rti];         RangeTblEntry *rte;          /* there may be empty slots corresponding to non-baserel RTEs */         if (rel == NULL)             continue;          Assert(rel->relid == rti);  /* sanity check on array */          /* ignore RTEs that are "other rels" */         if (rel->reloptkind != RELOPT_BASEREL)             continue;          rte = root->simple_rte_array[rti];          /*          * If parallelism is allowable for this query in general, see whether          * it's allowable for this rel in particular.  We have to do this          * before set_rel_size(), because (a) if this rel is an inheritance          * parent, set_append_rel_size() will use and perhaps change the rel's          * consider_parallel flag, and (b) for some RTE types, set_rel_size()          * goes ahead and makes paths immediately.          */         if (root->glob->parallelModeOK)             set_rel_consider_parallel(root, rel, rte);          set_rel_size(root, rel, rti, rte);     } } /*  * set_rel_size  *    Set size estimates for a base relation  */ static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,              Index rti, RangeTblEntry *rte) {     if (rel->reloptkind == RELOPT_BASEREL &&         relation_excluded_by_constraints(root, rel, rte))     {         /*          * We proved we don't need to scan the rel via constraint exclusion,          * so set up a single dummy path for it.  Here we only check this for          * regular baserels; if it's an otherrel, CE was already checked in          * set_append_rel_size().          *          * In this case, we go ahead and set up the relation's path right away          * instead of leaving it for set_rel_pathlist to do.  This is because          * we don't have a convention for marking a rel as dummy except by          * assigning a dummy path to it.          */         set_dummy_rel_pathlist(rel);//     }     else if (rte->inh)//inherit table     {         /* It's an "append relation", process accordingly */         set_append_rel_size(root, rel, rti, rte);     }     else     {         switch (rel->rtekind)         {             case RTE_RELATION://数据表                 if (rte->relkind == RELKIND_FOREIGN_TABLE)//FDW                 {                     /* Foreign table */                     set_foreign_size(root, rel, rte);                 }                 else if (rte->relkind == RELKIND_PARTITIONED_TABLE)//分区表                 {                     /*                      * A partitioned table without any partitions is marked as                      * a dummy rel.                      */                     set_dummy_rel_pathlist(rel);                 }                 else if (rte->tablesample != NULL)//采样表                 {                     /* Sampled relation */                     set_tablesample_rel_size(root, rel, rte);                 }                 else                 {                     /* Plain relation */                     set_plain_rel_size(root, rel, rte);//常规的数据表                 }                 break;             case RTE_SUBQUERY://子查询                  /*                  * Subqueries don't support making a choice between                  * parameterized and unparameterized paths, so just go ahead                  * and build their paths immediately.                  */                 set_subquery_pathlist(root, rel, rti, rte);//生成子查询访问路径                 break;             case RTE_FUNCTION://FUNCTION                 set_function_size_estimates(root, rel);                 break;             case RTE_TABLEFUNC://TABLEFUNC                 set_tablefunc_size_estimates(root, rel);                 break;             case RTE_VALUES://VALUES                 set_values_size_estimates(root, rel);                 break;             case RTE_CTE://CTE                  /*                  * CTEs don't support making a choice between parameterized                  * and unparameterized paths, so just go ahead and build their                  * paths immediately.                  */                 if (rte->self_reference)                     set_worktable_pathlist(root, rel, rte);                 else                     set_cte_pathlist(root, rel, rte);                 break;             case RTE_NAMEDTUPLESTORE://NAMEDTUPLESTORE,命名元组存储                 set_namedtuplestore_pathlist(root, rel, rte);                 break;             default:                 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);                 break;         }     }      /*      * We insist that all non-dummy rels have a nonzero rowcount estimate.      */     Assert(rel->rows > 0 || IS_DUMMY_REL(rel)); } //-------------------------------------------------------- /*  * set_base_rel_pathlists  *    Finds all paths available for scanning each base-relation entry.  *    Sequential scan and any available indices are considered.  *    Each useful path is attached to its relation's 'pathlist' field.  *  *    为每一个base rels找出所有可用的访问路径(顺序扫描和所有可用的索引都会考虑在内)  *    每一个可用的路径都会添加到pathlist链表中  *  */ static void set_base_rel_pathlists(PlannerInfo *root) {     Index       rti;      for (rti = 1; rti < root->simple_rel_array_size; rti++)//遍历RelOptInfo数组     {         RelOptInfo *rel = root->simple_rel_array[rti];          /* there may be empty slots corresponding to non-baserel RTEs */         if (rel == NULL)             continue;          Assert(rel->relid == rti);  /* sanity check on array */          /* ignore RTEs that are "other rels" */         if (rel->reloptkind != RELOPT_BASEREL)             continue;          set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);     } } /*  * set_rel_pathlist  *    Build access paths for a base relation  */ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,                  Index rti, RangeTblEntry *rte) {     if (IS_DUMMY_REL(rel))     {         /* We already proved the relation empty, so nothing more to do */     }     else if (rte->inh)//inherit     {         /* It's an "append relation", process accordingly */         set_append_rel_pathlist(root, rel, rti, rte);     }     else//常规     {         switch (rel->rtekind)         {             case RTE_RELATION://数据表                 if (rte->relkind == RELKIND_FOREIGN_TABLE)//FDW                 {                     /* Foreign table */                     set_foreign_pathlist(root, rel, rte);                 }                 else if (rte->tablesample != NULL)//采样表                 {                     /* Sampled relation */                     set_tablesample_rel_pathlist(root, rel, rte);                 }                 else//常规数据表                 {                     /* Plain relation */                     set_plain_rel_pathlist(root, rel, rte);                 }                 break;             case RTE_SUBQUERY://子查询                 /* Subquery --- 已在set_rel_size处理,fully handled during set_rel_size */                 break;             case RTE_FUNCTION:                 /* RangeFunction */                 set_function_pathlist(root, rel, rte);                 break;             case RTE_TABLEFUNC:                 /* Table Function */                 set_tablefunc_pathlist(root, rel, rte);                 break;             case RTE_VALUES:                 /* Values list */                 set_values_pathlist(root, rel, rte);                 break;             case RTE_CTE:                 /* CTE reference --- fully handled during set_rel_size */                 break;             case RTE_NAMEDTUPLESTORE:                 /* tuplestore reference --- fully handled during set_rel_size */                 break;             default:                 elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);                 break;         }     }      /*      * If this is a baserel, we should normally consider gathering any partial      * paths we may have created for it.      *      * However, if this is an inheritance child, skip it.  Otherwise, we could      * end up with a very large number of gather nodes, each trying to grab      * its own pool of workers.  Instead, we'll consider gathering partial      * paths for the parent appendrel.      *      * Also, if this is the topmost scan/join rel (that is, the only baserel),      * we postpone this until the final scan/join targelist is available (see      * grouping_planner).      */     if (rel->reloptkind == RELOPT_BASEREL &&         bms_membership(root->all_baserels) != BMS_SINGLETON)         generate_gather_paths(root, rel, false);      /*      * Allow a plugin to editorialize on the set of Paths for this base      * relation.  It could add new paths (such as CustomPaths) by calling      * add_path(), or delete or modify paths added by the core code.      */     if (set_rel_pathlist_hook)//钩子函数         (*set_rel_pathlist_hook) (root, rel, rti, rte);      /* Now find the cheapest of the paths for this rel */     set_cheapest(rel);//找出代价最低的访问路径  #ifdef OPTIMIZER_DEBUG     debug_print_rel(root, rel); #endif }//------------------------------------------------------------ /*  * make_rel_from_joinlist  *    Build access paths using a "joinlist" to guide the join path search.  *  *    根据joinlist构建连接访问路径,joinlist是函数deconstruct_jointree函数的返回  *  * See comments for deconstruct_jointree() for definition of the joinlist  * data structure.  */ static RelOptInfo * make_rel_from_joinlist(PlannerInfo *root, List *joinlist) {     int         levels_needed;     List       *initial_rels;     ListCell   *jl;      /*      * Count the number of child joinlist nodes.  This is the depth of the      * dynamic-programming algorithm we must employ to consider all ways of      * joining the child nodes.      */     levels_needed = list_length(joinlist);//joinlist链表长度      if (levels_needed <= 0)         return NULL;            /* nothing to do? */      /*      * Construct a list of rels corresponding to the child joinlist nodes.      * This may contain both base rels and rels constructed according to      * sub-joinlists.      */     initial_rels = NIL;     foreach(jl, joinlist)//遍历链表     {         Node       *jlnode = (Node *) lfirst(jl);         RelOptInfo *thisrel;          if (IsA(jlnode, RangeTblRef))//RTR         {             int         varno = ((RangeTblRef *) jlnode)->rtindex;              thisrel = find_base_rel(root, varno);         }         else if (IsA(jlnode, List))         {             /* Recurse to handle subproblem */             thisrel = make_rel_from_joinlist(root, (List *) jlnode);//递归调用         }         else         {             elog(ERROR, "unrecognized joinlist node type: %d",                  (int) nodeTag(jlnode));             thisrel = NULL;     /* keep compiler quiet */         }          initial_rels = lappend(initial_rels, thisrel);//加入初始化链表中     }      if (levels_needed == 1)     {         /*          * Single joinlist node, so we're done.          */         return (RelOptInfo *) linitial(initial_rels);     }     else     {         /*          * Consider the different orders in which we could join the rels,          * using a plugin, GEQO, or the regular join search code.          *          * We put the initial_rels list into a PlannerInfo field because          * has_legal_joinclause() needs to look at it (ugly :-().          */         root->initial_rels = initial_rels;          if (join_search_hook)//钩子函数             return (*join_search_hook) (root, levels_needed, initial_rels);         else if (enable_geqo && levels_needed >= geqo_threshold)             return geqo(root, levels_needed, initial_rels);//通过遗传算法构建连接访问路径         else             return standard_join_search(root, levels_needed, initial_rels);//通过动态规划算法构建连接路径     } }

到此,关于"PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么"的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注网站,小编会继续努力为大家带来更多实用的文章!

0