Check for skew in Greenplum

Check for skew

a) OS-level

$gpssh –f config/seg_hosts df –h | grep data

b) Segment Level

select 'facts' as "Table Name",max(c) as "Max Seg Rows", min(c) as "Min Seg Rows", (max(c)-min(c))*100.0/max(c) as "Percentage Difference Between Max & Min" from (SELECT count(*) c, gp_segment_id from facts group by 2) as a;

c) Segment Level - simple query

select gp_segment_id, count(*) from table_name group by gp_segment_id;

4) file level

gpperfmon=# CREATE OR REPLACE FUNCTION fn_create_db_files() RETURNS void AS

gpperfmon-# $

gpperfmon$# DECLARE

gpperfmon$# v_function_name text := 'fn_create_db_files';

gpperfmon$# v_location int;

gpperfmon$# v_sql text;

gpperfmon$# v_db_oid text;

gpperfmon$# v_num_segments numeric;

gpperfmon$# v_skew_amount numeric;

gpperfmon$# BEGIN

gpperfmon$# v_location := 1000;

gpperfmon$# SELECT oid INTO v_db_oid

gpperfmon$# FROM pg_database

gpperfmon$# WHERE datname = current_database();

gpperfmon$#

gpperfmon$# v_location := 2000;

gpperfmon$# v_sql := 'DROP VIEW IF EXISTS vw_file_skew';

gpperfmon$#

gpperfmon$# v_location := 2100;

gpperfmon$# EXECUTE v_sql;

gpperfmon$#

gpperfmon$# v_location := 2200;

gpperfmon$# v_sql := 'DROP EXTERNAL TABLE IF EXISTS db_files';

gpperfmon$#

gpperfmon$# v_location := 2300;

gpperfmon$# EXECUTE v_sql;

gpperfmon$#

gpperfmon$# v_location := 3000;

gpperfmon$# v_sql := 'CREATE EXTERNAL WEB TABLE db_files ' ||

gpperfmon$# '(segment_id int, relfilenode text, filename text, ' ||

gpperfmon$# 'size numeric) ' ||

gpperfmon$# 'execute E''ls -l $GP_SEG_DATADIR/base/' || v_db_oid ||

gpperfmon$# ' | ' ||

gpperfmon$# 'grep gpadmin | ' ||

gpperfmon$# E'awk {''''print ENVIRON["GP_SEGMENT_ID"] "\\t" $9 "\\t" ' ||

gpperfmon$# 'ENVIRON["GP_SEG_DATADIR"] "/' || v_db_oid ||

gpperfmon$# E'/" $9 "\\t" $5''''}'' on all ' || 'format ''text''';

gpperfmon$#

gpperfmon$# v_location := 3100;

gpperfmon$# EXECUTE v_sql;

gpperfmon$#

gpperfmon$# v_location := 4000;

gpperfmon$# SELECT count(*) INTO v_num_segments

gpperfmon$# FROM gp_segment_configuration

gpperfmon$# WHERE preferred_role = 'p'

gpperfmon$# AND content >= 0;

gpperfmon$#

gpperfmon$# v_location := 4100;

gpperfmon$# v_skew_amount := 1.2*(1/v_num_segments);

gpperfmon$#

gpperfmon$# v_location := 4200;

gpperfmon$# v_sql := 'CREATE OR REPLACE VIEW vw_file_skew AS ' ||

gpperfmon$# 'SELECT schema_name, ' ||

gpperfmon$# 'table_name, ' ||

gpperfmon$# 'max(size)/sum(size) as largest_segment_percentage, ' ||

gpperfmon$# 'sum(size) as total_size ' ||

gpperfmon$# 'FROM ( ' ||

gpperfmon$# 'SELECT n.nspname AS schema_name, ' ||

gpperfmon$# ' c.relname AS table_name, ' ||

gpperfmon$# ' sum(db.size) as size ' ||

gpperfmon$# ' FROM db_files db ' ||

gpperfmon$# ' JOIN pg_class c ON ' ||

gpperfmon$# ' split_part(db.relfilenode, ''.'', 1) = c.relfilenode ' ||

gpperfmon$# ' JOIN pg_namespace n ON c.relnamespace = n.oid ' ||

gpperfmon$# ' WHERE c.relkind = ''r'' ' ||

gpperfmon$# ' GROUP BY n.nspname, c.relname, db.segment_id ' ||

gpperfmon$# ') as sub ' ||

gpperfmon$# 'GROUP BY schema_name, table_name ' ||

gpperfmon$# 'HAVING sum(size) > 0 and max(size)/sum(size) > ' ||

gpperfmon$# v_skew_amount::text || ' ' ||

gpperfmon$# 'ORDER BY largest_segment_percentage DESC, schema_name, ' ||

gpperfmon$# 'table_name';

gpperfmon$#

gpperfmon$# v_location := 4300;

gpperfmon$# EXECUTE v_sql;

gpperfmon$#

gpperfmon$# EXCEPTION

gpperfmon$# WHEN OTHERS THEN

gpperfmon$# RAISE EXCEPTION '(%:%:%)', v_function_name, v_location, sqlerrm;

gpperfmon$# END;

gpperfmon$# $

gpperfmon-# language plpgsql;

CREATE FUNCTION

gpperfmon=# SELECT fn_create_db_files();

NOTICE: view "vw_file_skew" does not exist, skipping

CONTEXT: SQL statement "DROP VIEW IF EXISTS vw_file_skew"

PL/pgSQL function "fn_create_db_files" line 18 at execute statement

NOTICE: table "db_files" does not exist, skipping

CONTEXT: SQL statement "DROP EXTERNAL TABLE IF EXISTS db_files"

PL/pgSQL function "fn_create_db_files" line 24 at execute statement

fn_create_db_files

--------------------

(1 row)

gpperfmon=# SELECT * FROM vw_file_skew ORDER BY 3 DESC;

schema_name | table_name | largest_segment_percentage | total_size

-------------+------------+----------------------------+------------

(0 rows)

gpperfmon=#