视频1 视频21 视频41 视频61 视频文章1 视频文章21 视频文章41 视频文章61 推荐1 推荐3 推荐5 推荐7 推荐9 推荐11 推荐13 推荐15 推荐17 推荐19 推荐21 推荐23 推荐25 推荐27 推荐29 推荐31 推荐33 推荐35 推荐37 推荐39 推荐41 推荐43 推荐45 推荐47 推荐49 关键词1 关键词101 关键词201 关键词301 关键词401 关键词501 关键词601 关键词701 关键词801 关键词901 关键词1001 关键词1101 关键词1201 关键词1301 关键词1401 关键词1501 关键词1601 关键词1701 关键词1801 关键词1901 视频扩展1 视频扩展6 视频扩展11 视频扩展16 文章1 文章201 文章401 文章601 文章801 文章1001 资讯1 资讯501 资讯1001 资讯1501 标签1 标签501 标签1001 关键词1 关键词501 关键词1001 关键词1501 专题2001
检查数据倾斜分布
2020-11-09 14:38:49 责编:小采
文档

从传统数据库迁移到GP中一个重要的且经常被开发人员忽略的概念是数据分布,没有良好的设计表的分布键会导致严重的性能问题,以下函数将给开发人员及DBA检测一个表的数据倾斜情况。 -- Function: gpmg.data_skew(character varying) -- DROP FUNCTION gpmg.da


从传统数据库迁移到GP中一个重要的且经常被开发人员忽略的概念是数据分布,没有良好的设计表的分布键会导致严重的性能问题,以下函数将给开发人员及DBA检测一个表的数据倾斜情况。
-- Function: gpmg.data_skew(character varying)
 
-- DROP FUNCTION gpmg.data_skew(character varying);
 
CREATE OR REPLACE FUNCTION gpmg.data_skew(tablename character varying)
 RETURNS text AS
$BODY$
--2014-05-26,Gtlions,收集和统计数据倾斜情况
declare
 v_func character varying(200)='gpmg.data_skew()';
 v_begin_time timestamp;
 v_end_time timestamp;
 v_status int=0;
 v_msg text='Done.';
 v_record record;
 
 v_id integer;
 v_rq timestamp; 
 v_segs integer=;
 v_totalnums bigint=0;
 v_maxskew numeric=0.0;
 v_minskew numeric=0.0;
 v_maxskew_seg varchar(20);
 v_minskew_seg varchar(20);
 v_maxrows bigint=0;
 v_minrows bigint=0; 
 v_result varchar(2000);
 
begin
 v_id=nextval('gpmg.commonseq');
 v_rq=now();
 v_begin_time=clock_timestamp();
 v_result = 'GP hava ';
 select into v_segs count(*) segs from gp_segment_configuration where role='p' and content<>-1;
 v_result = v_result||v_segs||' instances, Standard skew is '||1.0/v_segs||'. ';
 -- bg1 segid, bg2 节点记录数量
 execute 'insert into gpmg.commontab(seq,tabname,bg1,bg2) select '||v_id||','''||$1||''',gp_segment_id,count(*) segrownums from '||$1||' group by rollup(( gp_segment_id)) order by gp_segment_id';
 select into v_segs,v_totalnums v_segs,max(bg2) from gpmg.commontab where seq=v_id and tabname=$1;
 --nm1 标准倾斜率, nm2 节点倾斜率, nm3 标准-节点倾斜率绝对值
 update gpmg.commontab set nm1=1::numeric/v_segs,nm2=bg2::numeric/v_totalnums,nm3=abs(1::numeric/v_segs-bg2::numeric/v_totalnums) where seq=v_id and tabname=$1;
 select into v_maxskew,v_minskew max(nm2),min(nm2) from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null;
 
 select into v_maxskew_seg hostname from gp_segment_configuration where role='p' and content in (select bg1 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_maxskew limit 1);
 select into v_minskew_seg hostname from gp_segment_configuration where role='p' and content in (select bg1 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_minskew limit 1);
 
 select into v_maxrows bg2 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_maxskew limit 1;
 select into v_minrows bg2 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_minskew limit 1;
 
 v_result =v_result ||'You Table ['||$1||'] skew info: [table_totalrows:'||v_totalnums||', maxskew:seg-'||v_maxskew_seg||', rows-'||v_maxrows||' '||v_maxskew||', minskew:seg-'||v_minskew_seg||', rows-'||v_minrows||' '||v_minskew||']';
 delete from gpmg.commontab where seq=v_id and tabname=$1;
 return v_result;
 v_end_time=clock_timestamp();
end;
$BODY$
 LANGUAGE plpgsql VOLATILE;
ALTER FUNCTION gpmg.data_skew(character varying)
 OWNER TO gpadmin;
GRANT EXECUTE ON FUNCTION gpmg.data_skew(character varying) TO public;
GRANT EXECUTE ON FUNCTION gpmg.data_skew(character varying) TO gpadmin;

bigdatagp=# select gpmg.data_skew('gpmg.manager_table');
 data_skew 
 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------
 GP hava  instances, Standard skew is 0.01562500000000000000. You Table [gpmg.manager_table] skew info: [table_totalrows:83, maxskew:seg-sdw16, rows-3 0.036144578313
25301205, minskew:seg-sdw2, rows-1 0.01204819277108433735]
(1 row)
 
bigdatagp=# select gpmg.data_skew('gpmg.func_log');
 data_skew 
 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------
 GP hava  instances, Standard skew is 0.01562500000000000000. You Table [gpmg.func_log] skew info: [table_totalrows:53708, maxskew:seg-sdw10, rows-907 0.016887614508
08073285, minskew:seg-sdw7, rows-773 0.014392169211290683]
(1 row)
2014-10-14 09:53:00


-EOF-

下载本文
显示全文
专题