PostgreSQL 源码解读(193)- 查询#109(排序#2 - ExecSort)
发表于:2025-11-07 作者:千家信息网编辑
千家信息网最后更新 2025年11月07日,本节继续介绍排序的实现,上一节介绍了ExecInitSort,本节主要介绍排序的实现函数ExecSort.一、数据结构SortState排序运行期状态信息/* ---------------- *
千家信息网最后更新 2025年11月07日PostgreSQL 源码解读(193)- 查询#109(排序#2 - ExecSort)
本节继续介绍排序的实现,上一节介绍了ExecInitSort,本节主要介绍排序的实现函数ExecSort.
一、数据结构
SortState
排序运行期状态信息
/* ---------------- * SortState information * 排序运行期状态信息 * ---------------- */typedef struct SortState{ //基类 ScanState ss; /* its first field is NodeTag */ //是否需要随机访问排序输出? bool randomAccess; /* need random access to sort output? */ //结果集是否存在边界? bool bounded; /* is the result set bounded? */ //如存在边界,需要多少个元组? int64 bound; /* if bounded, how many tuples are needed */ //是否已完成排序? bool sort_Done; /* sort completed yet? */ //是否使用有界值? bool bounded_Done; /* value of bounded we did the sort with */ //使用的有界值? int64 bound_Done; /* value of bound we did the sort with */ //tuplesort.c的私有状态 void *tuplesortstate; /* private state of tuplesort.c */ //是否worker? bool am_worker; /* are we a worker? */ //每个worker对应一个条目 SharedSortInfo *shared_info; /* one entry per worker */} SortState;/* ---------------- * Shared memory container for per-worker sort information * per-worker排序信息的共享内存容器 * ---------------- */typedef struct SharedSortInfo{ //worker个数? int num_workers; //排序机制 TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];} SharedSortInfo;TuplesortInstrumentation
报告排序统计的数据结构.
/* * Data structures for reporting sort statistics. Note that * TuplesortInstrumentation can't contain any pointers because we * sometimes put it in shared memory. * 报告排序统计的数据结构. * 注意TuplesortInstrumentation不能包含指针因为有时候会把该结构体放在共享内存中. */typedef enum{ SORT_TYPE_STILL_IN_PROGRESS = 0,//仍然在排序中 SORT_TYPE_TOP_N_HEAPSORT,//TOP N 堆排序 SORT_TYPE_QUICKSORT,//快速排序 SORT_TYPE_EXTERNAL_SORT,//外部排序 SORT_TYPE_EXTERNAL_MERGE//外部排序后的合并} TuplesortMethod;//排序方法typedef enum{ SORT_SPACE_TYPE_DISK,//需要用上磁盘 SORT_SPACE_TYPE_MEMORY//使用内存} TuplesortSpaceType;typedef struct TuplesortInstrumentation{ //使用的排序算法 TuplesortMethod sortMethod; /* sort algorithm used */ //排序使用空间类型 TuplesortSpaceType spaceType; /* type of space spaceUsed represents */ //空间消耗(以K为单位) long spaceUsed; /* space consumption, in kB */} TuplesortInstrumentation;二、源码解读
ExecSort使用tuplesort排序从outer子树节点获取的元组,在内存或者临时文件中缓存结果.在初次调用后,后续的每次调用返回一行.
实现逻辑不复杂,从outer plan中获取所有元组,调用tuplesort进行排序,如work_mem大小足够则在内存中存储否则在磁盘中使用临时文件存储.
/* ---------------------------------------------------------------- * ExecSort * * Sorts tuples from the outer subtree of the node using tuplesort, * which saves the results in a temporary file or memory. After the * initial call, returns a tuple from the file with each call. * 使用tuplesort排序outer子树节点获取的元组, * 在内存或者临时文件中缓存结果. * 在初次调用后,后续的每次调用返回一行. * * Conditions: * -- none. * -- 无 * * Initial States: * -- the outer child is prepared to return the first tuple. * 初始状态: * -- outer子节点已准备返回第一个元组. * ---------------------------------------------------------------- */static TupleTableSlot *ExecSort(PlanState *pstate){ //排序运行状态 SortState *node = castNode(SortState, pstate); EState *estate;//运行状态 ScanDirection dir;//扫描方向 Tuplesortstate *tuplesortstate;//元组排序状态 TupleTableSlot *slot;//元组slot CHECK_FOR_INTERRUPTS(); /* * get state info from node * 从节点中获取运行状态 */ SO1_printf("ExecSort: %s\n", "entering routine"); estate = node->ss.ps.state; dir = estate->es_direction; tuplesortstate = (Tuplesortstate *) node->tuplesortstate; /* * If first time through, read all tuples from outer plan and pass them to * tuplesort.c. Subsequent calls just fetch tuples from tuplesort. * 如果第一轮,从outer plan中读取所有元组并传递给tuplesort.c. * 后续的调用只是从tuplesort中提取元组. */ if (!node->sort_Done) { //-------------- 首次,需要进行排序 Sort *plannode = (Sort *) node->ss.ps.plan; PlanState *outerNode; TupleDesc tupDesc; SO1_printf("ExecSort: %s\n", "sorting subplan"); /* * Want to scan subplan in the forward direction while creating the * sorted data. * 在创建结果排序数据时,向前扫描子计划 */ //设置扫描方向 estate->es_direction = ForwardScanDirection; /* * Initialize tuplesort module. * 初始化tuplesort模块 */ SO1_printf("ExecSort: %s\n", "calling tuplesort_begin"); //获取outer plan运行状态 outerNode = outerPlanState(node); //获取结果类型(元组描述符) tupDesc = ExecGetResultType(outerNode); //执行tuplesort_begin_heap tuplesortstate = tuplesort_begin_heap(tupDesc, plannode->numCols, plannode->sortColIdx, plannode->sortOperators, plannode->collations, plannode->nullsFirst, work_mem, NULL, node->randomAccess); if (node->bounded) //节点有界,则设置边界 tuplesort_set_bound(tuplesortstate, node->bound); node->tuplesortstate = (void *) tuplesortstate; /* * Scan the subplan and feed all the tuples to tuplesort. * 扫描子计划并把所有元组都发给tuplesort */ for (;;) { //从outer plan中获取元组 slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break;//直至全部获取完毕 //排序 tuplesort_puttupleslot(tuplesortstate, slot); } /* * Complete the sort. * 完成排序 */ tuplesort_performsort(tuplesortstate); /* * restore to user specified direction * 恢复用户指定的扫描方向 */ estate->es_direction = dir; /* * finally set the sorted flag to true * 最后,设置已排序标记为T */ node->sort_Done = true; node->bounded_Done = node->bounded; node->bound_Done = node->bound; if (node->shared_info && node->am_worker) { TuplesortInstrumentation *si; Assert(IsParallelWorker()); Assert(ParallelWorkerNumber <= node->shared_info->num_workers); si = &node->shared_info->sinstrument[ParallelWorkerNumber]; tuplesort_get_stats(tuplesortstate, si); } SO1_printf("ExecSort: %s\n", "sorting done"); } SO1_printf("ExecSort: %s\n", "retrieving tuple from tuplesort"); /* * Get the first or next tuple from tuplesort. Returns NULL if no more * tuples. Note that we only rely on slot tuple remaining valid until the * next fetch from the tuplesort. * 在tuplesort中获取第一个/下一个元组. * 如无更多的元组,返回NULL. * 注意我们会一直保持存储在slot中的元组可用直至从tuplesort中提取下一个元组. */ slot = node->ss.ps.ps_ResultTupleSlot; (void) tuplesort_gettupleslot(tuplesortstate, ScanDirectionIsForward(dir), false, slot, NULL); return slot;}三、跟踪分析
创建数据表,插入测试数据
drop table if exists t_sort;create table t_sort(bh varchar(20),c1 int,c2 int,c3 int,c4 int,c5 int,c6 int);insert into t_sort select 'GZ01',col,col,col,col,col,col from generate_series(1,100000) as col;testdb=# explain (verbose,analyze) select * from t_sort order by c1,c2; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------- Sort (cost=8172.55..8308.71 rows=54464 width=82) (actual time=173.447..225.213 rows=100000 loops=1) Output: bh, c1, c2, c3, c4, c5, c6 Sort Key: t_sort.c1, t_sort.c2 Sort Method: external merge Disk: 4120kB -> Seq Scan on public.t_sort (cost=0.00..1280.64 rows=54464 width=82) (actual time=0.092..55.257 rows=100000 loops=1) Output: bh, c1, c2, c3, c4, c5, c6 Planning Time: 4.648 ms Execution Time: 238.227 ms(8 rows)测试脚本
testdb=# select * from t_sort order by c1,c2;跟踪调试
(gdb) b ExecSortBreakpoint 1 at 0x711909: file nodeSort.c, line 42.(gdb) cContinuing.Breakpoint 1, ExecSort (pstate=0x17a29b0) at nodeSort.c:4242 SortState *node = castNode(SortState, pstate);(gdb)输入参数
(gdb) p *pstate$1 = {type = T_SortState, plan = 0x179e540, state = 0x17a2798, ExecProcNode = 0x7118fd , ExecProcNodeReal = 0x7118fd , instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0, lefttree = 0x17a2ac8, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x17a3900, ps_ExprContext = 0x0, ps_ProjInfo = 0x0, scandesc = 0x17a2e50} 左树节点是SeqScan,亦即outer node
(gdb) p *pstate->lefttree$2 = {type = T_SeqScanState, plan = 0x17a8960, state = 0x17a2798, ExecProcNode = 0x6e0670 , ExecProcNodeReal = 0x710589 , instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0, lefttree = 0x0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x17a3268, ps_ExprContext = 0x17a2be0, ps_ProjInfo = 0x0, scandesc = 0x7faf6c0ec160} 获取节点的运行状态&扫描方向
(gdb) n48 CHECK_FOR_INTERRUPTS();(gdb) 56 estate = node->ss.ps.state;(gdb) 57 dir = estate->es_direction;(gdb) 58 tuplesortstate = (Tuplesortstate *) node->tuplesortstate;(gdb) (gdb) n65 if (!node->sort_Done)(gdb) (gdb) p *estate$3 = {type = T_EState, es_direction = ForwardScanDirection, es_snapshot = 0x17698a0, es_crosscheck_snapshot = 0x0, es_range_table = 0x17a8e58, es_plannedstmt = 0x16a7e80, es_sourceText = 0x16a6d78 "select * from t_sort order by c1,c2;", es_junkFilter = 0x0, es_output_cid = 0, es_result_relations = 0x0, es_num_result_relations = 0, es_result_relation_info = 0x0, es_root_result_relations = 0x0, es_num_root_result_relations = 0, es_tuple_routing_result_relations = 0x0, es_trig_target_relations = 0x0, es_trig_tuple_slot = 0x0, es_trig_oldtup_slot = 0x0, es_trig_newtup_slot = 0x0, es_param_list_info = 0x0, es_param_exec_vals = 0x0, es_queryEnv = 0x0, es_query_cxt = 0x17a2680, es_tupleTable = 0x17a2e18, es_rowMarks = 0x0, es_processed = 0, es_lastoid = 0, es_top_eflags = 16, es_instrument = 0, es_finished = false, es_exprcontexts = 0x17a2ca0, es_subplanstates = 0x0, es_auxmodifytables = 0x0, es_per_tuple_exprcontext = 0x0, es_epqTuple = 0x0, es_epqTupleSet = 0x0, es_epqScanDone = 0x0, es_use_parallel_mode = false, es_query_dsa = 0x0, es_jit_flags = 0, es_jit = 0x0, es_jit_worker_instr = 0x0}(gdb) p dir$4 = ForwardScanDirection(gdb) p *tuplesortstateCannot access memory at address 0x0未完成排序,进入排序逻辑,设置扫描方向
(gdb) n67 Sort *plannode = (Sort *) node->ss.ps.plan;(gdb) n78 estate->es_direction = ForwardScanDirection;(gdb) 86 outerNode = outerPlanState(node);(gdb) p *plannode$5 = {plan = {type = T_Sort, startup_cost = 12434.82023721841, total_cost = 12684.82023721841, plan_rows = 100000, plan_width = 29, parallel_aware = false, parallel_safe = true, plan_node_id = 0, targetlist = 0x17a8fc8, qual = 0x0, lefttree = 0x17a8960, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}, numCols = 2, sortColIdx = 0x17a89f8, sortOperators = 0x17a8a18, collations = 0x17a8a38, nullsFirst = 0x17a8a58}(gdb)获取结果类型并调用tuplesort_begin_heap获取排序状态tuplesortstate
(gdb) n87 tupDesc = ExecGetResultType(outerNode);(gdb) 96 NULL, node->randomAccess);(gdb) p *tupDesc$6 = {natts = 7, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x17a2e70}(gdb) n89 tuplesortstate = tuplesort_begin_heap(tupDesc,(gdb) 97 if (node->bounded)(gdb) 99 node->tuplesortstate = (void *) tuplesortstate;(gdb)循环获取元组并调用tuplesort_puttupleslot(下一节介绍),放到待排序中
(gdb) n107 slot = ExecProcNode(outerNode);(gdb) 109 if (TupIsNull(slot))(gdb) 112 tuplesort_puttupleslot(tuplesortstate, slot);(gdb) 113 }(gdb) 107 slot = ExecProcNode(outerNode);(gdb)设置断点,完成循环,并执行tuplesort_performsort(下一节介绍)完成排序
(gdb) b nodeSort.c:118Breakpoint 2 at 0x711a72: file nodeSort.c, line 118.(gdb) cContinuing.Breakpoint 2, ExecSort (pstate=0x17a29b0) at nodeSort.c:118118 tuplesort_performsort(tuplesortstate);(gdb)设置相关其他标记
(gdb) n123 estate->es_direction = dir;(gdb) 128 node->sort_Done = true;(gdb) 129 node->bounded_Done = node->bounded;(gdb) 130 node->bound_Done = node->bound;(gdb) 131 if (node->shared_info && node->am_worker)(gdb) 151 slot = node->ss.ps.ps_ResultTupleSlot;(gdb) p *node$7 = {ss = {ps = {type = T_SortState, plan = 0x179e540, state = 0x17a2798, ExecProcNode = 0x7118fd , ExecProcNodeReal = 0x7118fd , instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0, lefttree = 0x17a2ac8, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x17a3900, ps_ExprContext = 0x0, ps_ProjInfo = 0x0, scandesc = 0x17a2e50}, ss_currentRelation = 0x0, ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x17a33a8}, randomAccess = false, bounded = false, bound = 0, sort_Done = true, bounded_Done = false, bound_Done = 0, tuplesortstate = 0x17ac7b8, am_worker = false, shared_info = 0x0}(gdb) 完成排序,获取元组
(gdb) p node->tuplesortstate$8 = (void *) 0x17ac7b8(gdb) n152 (void) tuplesort_gettupleslot(tuplesortstate,(gdb) 155 return slot;(gdb) p *slot$9 = {type = T_TupleTableSlot, tts_isempty = false, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = false, tts_tuple = 0x17a3940, tts_tupleDescriptor = 0x17a34e8, tts_mcxt = 0x17a2680, tts_buffer = 0, tts_nvalid = 0, tts_values = 0x17a3960, tts_isnull = 0x17a3998, tts_mintuple = 0x1bc07f8, tts_minhdr = {t_len = 56, t_self = {ip_blkid = { bi_hi = 0, bi_lo = 0}, ip_posid = 0}, t_tableOid = 0, t_data = 0x1bc07f0}, tts_off = 0, tts_fixedTupleDescriptor = true}(gdb)下一次调用,直接提取元组
(gdb) cContinuing.Breakpoint 1, ExecSort (pstate=0x17a29b0) at nodeSort.c:4242 SortState *node = castNode(SortState, pstate);(gdb) n48 CHECK_FOR_INTERRUPTS();(gdb) 56 estate = node->ss.ps.state;(gdb) 57 dir = estate->es_direction;(gdb) 58 tuplesortstate = (Tuplesortstate *) node->tuplesortstate;(gdb) 65 if (!node->sort_Done)(gdb) 151 slot = node->ss.ps.ps_ResultTupleSlot;(gdb) 152 (void) tuplesort_gettupleslot(tuplesortstate,(gdb)四、参考资料
N/A
排序
状态
节点
内存
数据
结果
方向
运行
结构
信息
数据结构
文件
类型
边界
存储
一行
报告
标记
磁盘
空间
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
魔兽怀旧服开放哪些服务器
数据库漏洞扫描原理
软件开发属于工科还是理科
在合肥软件开发公司
磨憨网络安全教育
赤峰大晟互联网科技有限公司
自贡软件开发销售价格
万方数据库引文核心
自己搭建云服务器
铜陵ios软件开发招聘信息
湖南省网络安全应急指挥中心
达梦数据库授权文件在哪
网神网络安全硬件厂商
北京水费网上缴费软件开发团队
校园网络安全论文700字
平山软件开发
医院网络安全责任追究
网络安全防范小贴士
浙大数据库在线作业答案
derby数据库工具
数据库hav
专业的网络安全上市公司
服务器 端口映射
网络安全思维导图一年级
武汉大学网络安全学院宿舍
网络安全教育手抄报内容清晰
网络安全是电脑安全的一部分
软件开发适合大专学吗
vf数据库文件作用
吕梁软件开发设计