Check for skew in Greenplum
Check for skew
a) OS-level
$gpssh –f config/seg_hosts df –h | grep data
b) Segment Level
select 'facts' as "Table Name",max(c) as "Max Seg Rows", min(c) as "Min Seg Rows", (max(c)-min(c))*100.0/max(c) as "Percentage Difference Between Max & Min" from (SELECT count(*) c, gp_segment_id from facts group by 2) as a;
c) Segment Level - simple query
select gp_segment_id, count(*) from table_name group by gp_segment_id;
4) file level
gpperfmon=# CREATE OR REPLACE FUNCTION fn_create_db_files() RETURNS void AS
gpperfmon-# $
gpperfmon$# DECLARE
gpperfmon$# v_function_name text := 'fn_create_db_files';
gpperfmon$# v_location int;
gpperfmon$# v_sql text;
gpperfmon$# v_db_oid text;
gpperfmon$# v_num_segments numeric;
gpperfmon$# v_skew_amount numeric;
gpperfmon$# BEGIN
gpperfmon$# v_location := 1000;
gpperfmon$# SELECT oid INTO v_db_oid
gpperfmon$# FROM pg_database
gpperfmon$# WHERE datname = current_database();
gpperfmon$#
gpperfmon$# v_location := 2000;
gpperfmon$# v_sql := 'DROP VIEW IF EXISTS vw_file_skew';
gpperfmon$#
gpperfmon$# v_location := 2100;
gpperfmon$# EXECUTE v_sql;
gpperfmon$#
gpperfmon$# v_location := 2200;
gpperfmon$# v_sql := 'DROP EXTERNAL TABLE IF EXISTS db_files';
gpperfmon$#
gpperfmon$# v_location := 2300;
gpperfmon$# EXECUTE v_sql;
gpperfmon$#
gpperfmon$# v_location := 3000;
gpperfmon$# v_sql := 'CREATE EXTERNAL WEB TABLE db_files ' ||
gpperfmon$# '(segment_id int, relfilenode text, filename text, ' ||
gpperfmon$# 'size numeric) ' ||
gpperfmon$# 'execute E''ls -l $GP_SEG_DATADIR/base/' || v_db_oid ||
gpperfmon$# ' | ' ||
gpperfmon$# 'grep gpadmin | ' ||
gpperfmon$# E'awk {''''print ENVIRON["GP_SEGMENT_ID"] "\\t" $9 "\\t" ' ||
gpperfmon$# 'ENVIRON["GP_SEG_DATADIR"] "/' || v_db_oid ||
gpperfmon$# E'/" $9 "\\t" $5''''}'' on all ' || 'format ''text''';
gpperfmon$#
gpperfmon$# v_location := 3100;
gpperfmon$# EXECUTE v_sql;
gpperfmon$#
gpperfmon$# v_location := 4000;
gpperfmon$# SELECT count(*) INTO v_num_segments
gpperfmon$# FROM gp_segment_configuration
gpperfmon$# WHERE preferred_role = 'p'
gpperfmon$# AND content >= 0;
gpperfmon$#
gpperfmon$# v_location := 4100;
gpperfmon$# v_skew_amount := 1.2*(1/v_num_segments);
gpperfmon$#
gpperfmon$# v_location := 4200;
gpperfmon$# v_sql := 'CREATE OR REPLACE VIEW vw_file_skew AS ' ||
gpperfmon$# 'SELECT schema_name, ' ||
gpperfmon$# 'table_name, ' ||
gpperfmon$# 'max(size)/sum(size) as largest_segment_percentage, ' ||
gpperfmon$# 'sum(size) as total_size ' ||
gpperfmon$# 'FROM ( ' ||
gpperfmon$# 'SELECT n.nspname AS schema_name, ' ||
gpperfmon$# ' c.relname AS table_name, ' ||
gpperfmon$# ' sum(db.size) as size ' ||
gpperfmon$# ' FROM db_files db ' ||
gpperfmon$# ' JOIN pg_class c ON ' ||
gpperfmon$# ' split_part(db.relfilenode, ''.'', 1) = c.relfilenode ' ||
gpperfmon$# ' JOIN pg_namespace n ON c.relnamespace = n.oid ' ||
gpperfmon$# ' WHERE c.relkind = ''r'' ' ||
gpperfmon$# ' GROUP BY n.nspname, c.relname, db.segment_id ' ||
gpperfmon$# ') as sub ' ||
gpperfmon$# 'GROUP BY schema_name, table_name ' ||
gpperfmon$# 'HAVING sum(size) > 0 and max(size)/sum(size) > ' ||
gpperfmon$# v_skew_amount::text || ' ' ||
gpperfmon$# 'ORDER BY largest_segment_percentage DESC, schema_name, ' ||
gpperfmon$# 'table_name';
gpperfmon$#
gpperfmon$# v_location := 4300;
gpperfmon$# EXECUTE v_sql;
gpperfmon$#
gpperfmon$# EXCEPTION
gpperfmon$# WHEN OTHERS THEN
gpperfmon$# RAISE EXCEPTION '(%:%:%)', v_function_name, v_location, sqlerrm;
gpperfmon$# END;
gpperfmon$# $
gpperfmon-# language plpgsql;
CREATE FUNCTION
gpperfmon=# SELECT fn_create_db_files();
NOTICE: view "vw_file_skew" does not exist, skipping
CONTEXT: SQL statement "DROP VIEW IF EXISTS vw_file_skew"
PL/pgSQL function "fn_create_db_files" line 18 at execute statement
NOTICE: table "db_files" does not exist, skipping
CONTEXT: SQL statement "DROP EXTERNAL TABLE IF EXISTS db_files"
PL/pgSQL function "fn_create_db_files" line 24 at execute statement
fn_create_db_files
--------------------
(1 row)
gpperfmon=# SELECT * FROM vw_file_skew ORDER BY 3 DESC;
schema_name | table_name | largest_segment_percentage | total_size
-------------+------------+----------------------------+------------
(0 rows)
gpperfmon=#