collect.sh system

Here is a revised version of the old collect.sh
This version is purely for data collection in raw format
I am still working on formating scripts for the raw data


#!/bin/ksh  

function usage
{
       echo "Usage: $(basename $0) <username> <password> <host> [sid] [port]"
       echo "  username        database username"
       echo "  username        database password"
       echo "  host            hostname or IP address"
       echo "  sid             optional database sid (default: orcl)"
       echo "  port            optional database port (default: 1521)"
       exit 2
}

[[ $# -lt 3 ]] && usage
[[ $# -gt 5 ]] && usage

UN=$1
PW=$2
HOST=$3
SID=orcl
PORT=1521

[[ $# -gt 3 ]] && SID=$4
[[ $# -gt 4 ]] && PORT=$5

    #FILE="soe01.dbf"

    FILE=""
    COLLECT="wts fio"     # list of stats to sample
    COLLECT="fio_all fio wts systat"
    COLLECT="fio wts systat"
    COLLECT="wts systat"

    FILE_FILTER=""
    # set FILE_FILTER to comment to avoid the file filter
    if  test x$FILE = x; then
       FILE_FILTER="-- "
    fi

    # seems to sample at sample rate +3 secs, ie 7 actual works out to 10 secs
    SAMPLE_RATE=2            # rate at which to collect
    RUN_TIME=${2:-43200}     # total run time, 12 hours default 43200
    FAST_SAMPLE="$COLLECT"   # list of stats to sample
    DEBUG=${DEBUG:-0}        # 1 output debug, 2 include SQLplus output
    MON_LOG=${MON_LOG:-$HOME/LOG}


    MACHINE=`uname -a | awk '{print $1}'`
    case $MACHINE  in
    AIX)
            MKNOD=/usr/sbin/mknod
            ;;
    SunOS)
            MKNOD=/etc/mknod
            ;;
    HP-UX)
            MKNOD=mknod
            ;;
    *)
            MKNOD=mknod
            ;;
    esac

  # create OUPUT directory
    if [ ! -f "$MON_LOG" ]; then
       mkdir $MON_LOG > /dev/null 2>&1
    fi

  # looks like "collect.sh end" will end all running collect.sh's
    if test x$1 = xend ; then
       if [ -f $MON_LOG/*\.end ]; then
           rm $MON_LOG/*\.end
       fi
       if [ -f $MON_LOG/*/*\.end ]; then
           rm $MON_LOG/*/*\.end
       fi
       exit
    fi

  # setup OUTPUT file name template
    CURR_DATE=`date "+%d%m_%H%M%S"`  
    MON_NODE="`hostname | sed -e 's/\..*//'`"
    MON_LOG=${MON_LOG}/$CURR_DATE
    mkdir $MON_LOG > /dev/null 2>&1
    PRE=${MON_LOG}/${MON_NODE}_${CURR_DATE}_
    SUF=.dat
    TMP="$MON_LOG/"
    OUTPUT=${TMP}connect.out
    OUTPUTLOG=${TMP}connect.log
    CLEANUP=${TMP}cleanup.sh
    echo "SYS=$SYS"  >  $OUTPUTLOG

  # exit if removed
    EXIT=${PRE}collect.end
    touch $EXIT

  # printout setup
    for i in 1; do
    echo
    echo
    echo "SYS=$SYS"
    echo "RUN_TIME=$RUN_TIME"
    echo "SAMPLE_RATE=$SAMPLE_RATE"
    echo "FAST_SAMPLE=$FAST_SAMPLE"
    echo "HOST=$HOST"
    echo "DEBUG=$DEBUG"
    echo
    done > $OUTPUTLOG
    cat $OUTPUTLOG

  # create a UNIX named pipe
  # in order to avoid disconnects when attaching sqlplus to the named pipe
  # create an empty file and "tail -f" this empty file into the pipe
  # this will prevent the pipe from closing on the sqlplus session
  # otherwise the sqlplus session would exit after every cat to the pipe
  # had finished

  # setup sqlplus connection reading off a pipe
    OPEN=${MON_LOG}/collect.open
    PIPE=${MON_LOG}/collect.pipe
    rm $OPEN $PIPE > /dev/null 2>&1
    touch  $OPEN
    cmd="$MKNOD $PIPE p"
    eval $cmd
    tail -f $OPEN >> $PIPE &
    OPENID="$!"




  # run SQLPLUS silent unless DEBUG is 2 or higher
       SILENT=""
    if [ $DEBUG -lt 2 ]; then
       SILENT="-s"
    fi
    sqlplus $SILENT "$UN/$PW@\
                  (DESCRIPTION=\
                     (ADDRESS_LIST=\
                        (ADDRESS=\
                           (PROTOCOL=TCP)\
                           (HOST=$HOST)\
                           (PORT=$PORT)))\
                     (CONNECT_DATA=\
                        (SERVER=DEDICATED)\
                        (SERVICE_NAME=$SID)))" < $PIPE > /dev/null &
    SQLID="$!"

  # setup exit/cleanup stuff
    for i in 1; do
      echo "date"
      echo "("
      echo "rm $PIPE $OPEN $EXIT"  
      echo "kill -9 $SQLID $OPENID $VMSTATID"
      echo ") > /dev/null 2>&1"
    done > $CLEANUP
    chmod 755 $CLEANUP
    trap "echo $CLEANUP;sh $CLEANUP" 0 3 5 9 15

    if [ ! -p $PIPE ]; then
       echo "error creating named pipe "
       echo "command was:"
       echo "             $cmd"
       eval $CMD
       exit
    fi

#   /******************************/
#   *                             *
#   * BEGIN FUNCTION DEFINITIONS  *
#   *                             *
#   /******************************/
#

function debug {
if [ $DEBUG -ge 1 ]; then
   #   echo "   ** beg debug **"
   var=$*
   nvar=$#
   if test x"$1" = xvar; then
     shift
     let nvar=nvar-1
     while (( $nvar > 0 ))
     do
        eval val='$'{$1} 1>&2
        echo "       :$1:$val:"  1>&2
        shift
        let nvar=nvar-1
     done
   else
     while (( $nvar > 0 ))
     do
        echo "       :$1:"  1>&2
        shift
        let nvar=nvar-1
     done
   fi
   #   echo "   ** end debug **"
fi
}                         

function check_exit {
        if [  ! -f $EXIT ]; then
           echo "exit file removed, exiting at `date`"
           cat $CLEANUP
           $CLEANUP
           exit
        fi
}

function sqloutput  {
    cat << EOF >> $PIPE &
       set pagesize 0
       set feedback off
       spool $OUTPUT
       select 1 from dual;
       spool off
EOF
}

function testconnect {
     rm $OUTPUT 2> /dev/null
     if [ $CONNECTED -eq 0 ]; then
        limit=10
     else
        limit=600
     fi
     debug "before sqloutput"
     sqloutput
     debug "after sqloutput"
     count=0
     found=1
     debug "before while"
     while [ $count -lt $limit -a $found -eq 1 ]; do
        if [ -f $OUTPUT ]; then
          grep '^ *1'  $OUTPUT > /dev/null  2>&1
          found=$?
        else
          debug  "sql output file: $OUTPUT, not found"
        fi
          debug "found $found"
          debug "loop#  $LOOPS_DONE  count $count limit $limit "
          if [ $CONNECTED -eq 0 ]; then
             echo "Trying to connect"
          fi
          let TO_SLEEP=TO_SLEEP-count
          sleep $count
          count=`expr $count + 1`
          check_exit
     done
     debug "after while"
     if [ $count -ge $limit ]; then
       echo "timeout waiting connection to sqlplus"
       echo "output from sqlplus: "
       if [ -f $OUTPUT ]; then
          cat $OUTPUT
       else
          echo "sqlplus output file: $OUTPUT, not found"
          echo "check user name and password for sqlplus"
          echo "try 'export DEBUG=1' and rerun"
       fi
       echo "loop# " $LOOPS_DONE " count $count limit $limit " >> $OUTPUTLOG
       eval $CMD
       exit
     fi
     echo "loop# " $LOOPS_DONE " count $count limit $limit " >> $OUTPUTLOG
}

 
# reads, blocks, time
function wts  {
     cat << EOF
     spool  ${TMP}wts.tmp
     Select 'waitstat'       ||','||
            total_waits      ||','||
            time_waited_micro||','||
            replace(event,' ','_')
     from v\$system_event
      where event in  (
          'db file sequential read',     -- single
          'db file parallel read',       -- multi 2-128 ?
          'db file scattered read',      -- multi 2-128 blocks ?
          'direct path read',            -- multi 1-128 blocks (8K-1M)
          'direct path read temp',       -- multi 1-128 ?? smaller
          'control file sequential read',-- multi 1-64 (blocks?)
          'log file sequential read',    -- multi 512 bytes - 4M
          'log file sync',               -- write
          'log file parallel write'      -- write
           ) ;
     spool off
EOF
}

# 3213517201 control file sequential read
#  549236675 log file sequential read
# 3999721902 log file parallel write
# 1328744198 log file sync
# 2652584166 db file sequential read
#  506183215 db file scattered read
#  834992820 db file parallel read
# 3926164927 direct path read
#  861319509 direct path read temp

#OTHER READ
#         'LGWR sequential i/o',         -- ??
#         'LGWR random i/o',             -- ??
#WRITES
# Data file init write
# control file single write
# control file parallel write
# local write wait
# log file single write
# log file parallel write
# db file single write
# db file parallel write
# direct path write
# direct path write temp

# reads, blocks, time
function systat  {
     cat << EOF
     spool  ${TMP}systat.tmp
     Select 'systat'  ||','||
            replace(name,' ','_') ||','||
             value   ||','||
         stat_id    
       from v\$sysstat fs
       where stat_id in (
          2572010804, -- physical read total bytes
          789768877,  -- physical read IO requests            
          523531786, --physical read bytes                     
          2007302071  -- physical read total multi block requests  
       );
     spool off
EOF
}

#        3343375620, -- physical read total IO requests
#  3343375620, -- physical read total IO requests     
#  2263124246, -- physical reads                           
#  4171507801, --   physical reads cache                  
#  2589616721 --   physical reads direct                 
#  2572010804, -- physical read total bytes            
# physical read total IO requests                                  
#    physical read IO requests                                   
#      "single block reads" =
#         = physical read IO requests - physical read total multi block requests
#     physical read total multi block requests                  

# physical read total bytes                                    
#    physical read bytes                                      

# physical reads                                             
#   physical reads cache                                   
#   physical reads direct                                 

# reads, blocks, time
function aas  {
     cat << EOF
     spool  ${TMP}aas.tmp
       select
            round( sum(decode(session_state,'ON CPU',1,1))/15,2)     "TOTAL",
            round( sum(decode(session_state,'ON CPU',1,0))/15,2)     "CPU",
            round( sum(decode(session_state,'WAITING', decode(wait_class, 'User I/O',1,0),0))/15,2)    "IO" ,
            round((sum(decode(session_state,'WAITING', 1,0))    -
                   sum(decode(session_state,'WAITING', decode(wait_class, 'User I/O',1,0),0))
                  )/15,2)    "WAIT"
       from v\$active_session_history ash
       where
        SAMPLE_TIME > sysdate - (15/(24*60*60)) and
            SESSION_TYPE = 'FOREGROUND'
       ;
     spool off
EOF
}

# reads, blocks, time
function fio_all  {
     cat << EOF
     spool  ${TMP}fio_all.tmp
     Select 'filestat'  ||','||
             phyrds     ||','||
             PHYBLKRD   ||','||
         readtim    ||','||
             name
       from v\$filestat fs ,v\$datafile df
       where fs.file# = df.file#;
     spool off
EOF
}

# reads, blocks, time
function fio  {
     cat << EOF
     spool  ${TMP}fio.tmp
     Select 'filestat'     ||','||
            sum(phyrds)    ||','||
            sum(PHYBLKRD)  ||','||
        sum(readtim)    
       from v\$filestat
        $FILE_FILTER where file#=$file_number
        ;
     spool off
EOF
}

#
function filenames  {
     cat << EOF
     spool  ${TMP}filenames.tmp
     Select file#||','||name from v\$datafile;
     spool off
EOF
}
#
function filen  {
     cat << EOF
     spool  ${TMP}filen.tmp
     Select 'FSTART:'||file#||':FEND' from v\$datafile where name like '%$FILE';
     spool off
EOF
}

function tight_loop {
   #
   # sleep every SAMPLE_RATE until a total of SLOW_RATE sleeped
   # every SAMPLE_RATE check EXIT file exists
   # if EXIT file has been deleted, then exit
   #
     check_exit
     SLEPTED=0
     debug var SLEPTED SAMPLE_RATE
     while [ $SLEPTED -lt $RUN_TIME -a -f $EXIT ]; do
        #_date > $PIPE
        # testconnect
        for i in $FAST_SAMPLE; do
           ${i} >> $PIPE
        done
        testconnect
        #date=`tail -1 ${TMP}date.tmp | sed -e 's/,.*//'`
        date=` date '+%s'`
        for i in  $FAST_SAMPLE; do
          cat ${TMP}${i}.tmp | sed -e "s/^/$date,/" >> ${PRE}${i}$SUF
        done
        check_exit
        sleep $SAMPLE_RATE
        debug "sleeping $SAMPLE_RATE"
        let SLEPTED=SLEPTED+SAMPLE_RATE
        debug var SLEPTED
     done
}



function setup_sql {
  cat << EOF
  set echo on
  set pause off
  set linesize 2500
  set verify off
  set feedback off
  set heading off
  set pagesize 0
  set linesize 100
  set trims on
  set trim on
  column start_day    new_value start_day
  select  to_char(sysdate,'J')     start_day  from dual;
  column pt           new_value pt
  column seq          new_value seq
  column curr_time    new_value curr_time
  column elapsed      new_value elapsed     
  column timer        new_value timer       
  set echo off
EOF
}
#  alter session set sql_trace=false;
#  REM drop sequence orastat;
#  REM create sequence orastat;


#   /******************************/
#   *                             *
#   *   END FUNCTION DEFINITIONS  *
#   *                             *
#   /******************************/



#   /******************************/
#   *                             *
#   *      BEGIN PROGRAM          *
#   *                             *
#   /******************************/


  LOOPS_DONE=0
  CURRENT=0
  TO_SLEEP=$SLOW_RATE

  CONNECTED=0
  setup_sql >> $PIPE
  testconnect
  echo "Connected, starting collect at `date`"
  CONNECTED=1
  setup_sql >> $PIPE

echo "starting stats collecting "
# BEGIN COLLECT LOOP
        
       echo "cleanup script: $CLEANUP "
       echo "get output with: (cd $MON_LOG; fio_all.sh )"
       # (sleep 5; cd $MON_LOG; fio_all.sh ) &
       # OUTPUT_SID=$!
       # echo "kill -9 $OUTPUT_SID > /dev/null 2>&1 " >> $CLEANUP
       if  test x$FILE != x; then
         filen >> $PIPE
         testconnect
         fio > ${PRE}_toto
         # get file# to collect io stats
         file_number=`cat ${TMP}filen.tmp | grep 'FSTART:' | sed -e 's/.*FSTART://' | sed -e 's/:FEND.*//'`
       fi
       # either sum all the files, or just get one (don't save the name in output)
       # sudo nfsf.d  $DTRACE_HOST $FILE >> ${PRE}_dtrc${SUF} &
       # get all file names
       #            ticks  addr  file
       #sudo nfsm.d  `expr $SAMPLE_RATE + 3 ` $HOST >> ${PRE}_dtrc${SUF} &
       #sudo nfst.d  `expr $SAMPLE_RATE + 3 ` $HOST >> ${PRE}_dtrc${SUF} &
       #sudo nfs.d  `expr $SAMPLE_RATE + 3 ` $HOST >> ${PRE}_dtrc${SUF} &
       #sudo nfsz.d  `expr $SAMPLE_RATE + 3 ` $HOST >> ${PRE}_dtrc${SUF} &
       #DTRACEID="$!"
       #echo "sudo kill -9 $DTRACEID > /dev/null 2>&1 " >> $CLEANUP
       tight_loop
# END COLLECT LOOP

# CLEANUP
   echo "run time expired, exiting at `date`"
   cat $CLEANUP
   $CLEANUP


Comments