DB2 SQL
 
SOME SQL HINTS

MORE SQL: TEMP.TABLES, WHERE, ORDER BY, ALIASES, SUBQUERIES, COLUMN & SCALAR FUNCTIONS, GROUP BY, HAVING, UNION, EXCEPT, INTERSECT
 

CREATE TABLE TAB1 ( ... )  IN USERSPACE1;
ALTER TABLE TAB1 ADD PRIMARY KEY (...)
DROP TABLE TAB1

 
DECLARE GLOBAL TEMPORARY TABLE TEMP1
LIKE TAB1
ON COMMIT PRESERVE ROWS
NOT LOGGED
IN MYTEMPSPACE

WHERE CLAUSE:

X = Y, X <> Y, X < Y, X > Y, X <= Y, X >= Y, 
IS NULL, IS NOT NULL
IN, NOT IN
BETWEEN, NOT BETWEEN
LIKE, NOT LIKE
EXISTS, NOT EXISTS

SELECT NAME, SALARY FROM STUFF WHERE SALARY > 20000

... WHERE YEARS IN (1,2,3)
... WHERE YEARS NOT BETWEEN 3 AND 8
... WHERE NAME LIKE 'S_____'
... WHERE NAME LIKE 'S%'
... ORDER BY SALARY DESC

ALIASSES CAN BE USED IN THE "ORDER BY":
BAD NEWS IS THAT THE NAME ALIASES FOR FIELDS (SIMPLE OR CALCULATED) CAN NOT BE USED IN THE "WHERE" CLAUSE, BECAUSE THE "WHERE" CLAUSE IS PROCESSED BEFORE THE ALIAS NAME IS APPLIED TO THE RESULT.  SO YOU MAY NEED TO REPEAT THE WHOLE EXPRESSION IN THE WHERE CLAUSE.
BUT GOOD NEWS IS THAT THE NAME ALIASES CAN BE USED IN THE "ORDER BY" CLAUSE.
 

SELECT  VALUE(X.PARAM1, X.PARAM2) MYPARAM
FROM TEST1 X
WHERE (X.PARAM1, X.PARAM2) >= 40
ORDER BY MYPARAM;

SUBQUERY:

SELECT LNAME 
FROM EMPLOYEE 
WHERE LNAME IN (SELECT SPERSON FROM SALES WHERE SALES_DATE < '01/01/1996')

COLUMN FUNCTIONS:

SUM, AVG, MIN, MAX, COUNT
NOTE: DON'T TRY TO SPECIFY COLUMN FUNCTIONS IN A WHERE CLAUSE - THEY WILL NOT WORK, BECAUSE WHERE CLAUSE IS EVALUATED BEFORE THE SELECT CLAUSE. 

SELECT NAME, SALARY 
FROM EMPLOYEE 
WHERE SALARY > (SELECT AVG(SALARY) FROM EMPLOYEE) 
ORDER BY SALARY DESC

SCALAR FUNCTIONS:

ABS, HEX, LENGTH, YEAR, MONTH, DAY, LCASE OR LOWER, UCASE OR UPPER

SELECT MIN(LENGTH(DEPTNAME) AS MIN, MAX(LENGTH(DEPTNAME)) AS MAX FROM DEPT

GROUP BY  &  HAVING:

SELECT SALES_DATE, MAX(SALES) AS MAX_SALES
FROM SALES
GROUP BY SALES_DATE WHERE MAX(SALES) > 25

UNION, EXCEPT, INTERSECT:

SELECT SALES_PERSON FROM SALES WHERE REGION = 'REG1'
UNION
SELECT SALES_PERSON FROM SALES WHERE SALES > 3

NOTE: UNION PROCESSES BOTH QUERIES ELIMINATES DUPLICATES BETWEEN THEM, AND RETURNS THE FINAL COMBINED RESULT SET

SELECT SALES_PERSON FROM SALES WHERE REGION = 'REG1'
EXCEPT
SELECT SALES_PERSON FROM SALES WHERE SALES > 3

SELECT SALES_PERSON FROM SALES WHERE REGION = 'REG1'
INTERSECT
SELECT SALES_PERSON FROM SALES WHERE SALES > 3

INSERT, UPDATE, DELETE:

INSERT INTO TAB1 VALUES (123,'SOMETHING',....)
INSERT INTO TAB1 SELECT ... FROM ... WHERE ..

UPDATE TAB1 SET (A,B) = (123,'SOMETHING') WHERE ...

DELETE FROM TAB1 WHERE ...


 

SQL_2 - RECURSIVE QUERIES, COMBINED OUTER JOINS

SELECT COL1, CAST(ROUND(AVG(COL2),2) AS DECIMAL(9,2)) AS AVER_COL2
FROM TAB1
GROUP BY COL1
ORDER BY COL1
 

RECURSIVE QUERY:

WITH EXP1 (COL1, COL2, COL3) AS 
(
-------- SELECT TOP NODES
    SELECT ... FROM TAB1 WHERE ... 

UNION 

-------- SELECT OTHER NODES RECURSIVELY
    SELECT ... FROM TAB1, EXP1 WHERE ...

)
SELECT ... FROM EXP1
 

HERE IS AN EXAMPLE:

WITH EXP1 (ROW_ID, PAR_POSTN_ID, X_POSITION_NAME, PARENT_NAME) 
AS (

  SELECT 
    PP.ROW_ID, PP.PAR_POSTN_ID, 
    PP.X_POSITION_NAME, '' PARENT_NAME
  FROM INST1.S_POSTN PP
  WHERE PP.PAR_POSTN_ID IS NULL

  UNION ALL

  SELECT 
    TT.ROW_ID, TT.PAR_POSTN_ID, 
    TT.X_POSITION_NAME, EE.X_POSITION_NAME PARENT_NAME 
  FROM INST1.S_POSTN TT, EXP1 EE 
  WHERE EE.ROW_ID = TT.PAR_POSTN_ID

   ) 

SELECT 
  P.PARENT_NAME, E.LAST_NAME, E.FST_NAME, 
  E.LOGIN, E.PAGER_PIN AS REP_ID, P.X_POSITION_NAME
FROM 
  INST1.S_EMPLOYEE E, 
  INST1.S_EMP_POSTN EP, 
  EXP1 P
WHERE E.ROW_ID = EP.EMP_ID
  AND EP.POSITION_ID = P.ROW_ID
ORDER BY P.PARENT_NAME, E.LAST_NAME, E.FST_NAME

COMBINED OUTER JOINS:
SELECT EMPNO, DEPTNO, PROJNAME
FROM ( EMPLOYEE LEFT OUTER JOIN PROJECT ON RESPEMP=EMPNO )
  LEFT OUTER JOIN DEPARTMENT ON MGRNO = EMPNO

 

SQL OLAP

- OLAP FUNCTIONS - OLAP FUNCTIONS - FROM SQL REFERENCE
- HTTP://WWW7B.SOFTWARE.IBM.COM/DMDD/LIBRARY/TECHARTICLE/LYLE/0110LYLE.HTML - GOOD REVIEW OF OLAP FUNCTIONS

- HTTP://WWW.ALMADEN.IBM.COM/CS/PEOPLE/PETERH/B15.PDF - PETER J. HAAS DB2 UDB ADVANCED ANALYTICS FOR BUSINESS INTELLIGENCE
- HTTP://WWW.ALMADEN.IBM.COM/CS/PEOPLE/PETERH/B16.PDF - PETER J. HAAS SPEEDING UP DB2 UDB USING SAMPLING

- HTTP://WWW.IBM.COM/SEARCH?EN=UTF&V=11&LANG=EN&CC=US&LV=C&Q=DB2+OLAP+FUNCTION+REFERENCE - SEARCH IBM SITE

STAR SCHEMA GROUPING:
SELECT EMPNO, FIRSTNAME, LASTNAME, SUM(SALES) AS TOT_SALES, (SALARY + BONUS + COMM) AS COMPENSATION
FROM EMPLOYEE, SALES
WHERE SEX='M' AND YEAR(SALES_DATE) = 1996 AND LASTNAME = SALES_PERSON
GROUP BY EMPNO, FIRSTNAME, LASTNAME, (SALARY + BONUS + COMM)

ROLLUP GROUPING:
SELECT YEAR(SALES_DATE) AS YEAR, COUNT(*) AS TOT_SALES
FROM SALES
GROUP BY ROLLUP (YEAR(SALES_DATE))

YEAR      TOT_SALES
-----         -------------
 -             41
 1995        5
 1996        36

SELECT YEAR(SALES_DATE) AS YEAR, REGION, COUNT(*) AS TOT_SALES
FROM SALES
GROUP BY ROLLUP (YEAR(SALES_DATE), REGION)

SELECT YEAR(SALES_DATE) AS YEAR, REGION, SALES_PERSON, COUNT(*) AS TOT_SALES
FROM SALES
GROUP BY ROLLUP (YEAR(SALES_DATE), REGION, SALES_PERSON)

CUBE GROUPING:
SELECT YEAR(SALES_DATE) AS YEAR, REGION, COUNT(*) AS TOT_SALES
FROM SALES
GROUP BY CUBE (YEAR(SALES_DATE), REGION)

SELECT YEAR(SALES_DATE) AS YEAR, REGION, SALES_PERSON, COUNT(*) AS TOT_SALES
FROM SALES
GROUP BY CUBE (YEAR(SALES_DATE), REGION, SALES_PERSON)
 

OTHER OLAP FEATURES:
SOME USEFUL FUNCTIONS: SUM, COUNT,AVERAGE, STDDEV, CORR, REGR_* ROWNUMBER, RANK, WINDOWS AGGREGATES

SIMPLE STATISTICS:
SELECT COUNTRY, YEAR, COUNT(*) AS COUNT, SUM(AMOUNT) AS SUM, AVG(AMOUNT) AS AVG, MAX(AMOUNT) AS MAX, STDDEV(AMOUNT) AS STDDEV
FROM MYTABLE
GROUP BY COUNTRY, YEAR

DETECT TRANSACTIONS WHICH ARE LARGER THAN USUAL:

CREATE VIEW PROFILE(CUST_ID, AVG_AMT, SD_AMT) AS 
SELECT CUST_ID, AVG(CHARGE_AMT), STDDEV(CHARGE_AMT) FROM TRANS
WHERE DATE BETWEEN '2002-0101' AND '2002-03-31'
GROUP BY CUST_ID

CREATE TRIGGER BIG_CHRG
AFTER INSERT ON TRANS
REFERENCING NEW AS NEWROW FOR EACH ROW MODE DB2SQL
WHEN (NEWROW.CHARGE_AMT > (SELECT AVG_AMT + 2.0 * SD_AMT FROM PROFILE WHERE PROFILE.CUST_ID = NEWROW.CUST_ID))
INSERT INTO BIG_CHARGES (CUST_ID,CHARGE_AMT)
VALUES(NEWROW.CUST_ID, NEWROW.CHARGE_AMT))

EQUI-WIDTH HISTOGRAM:
WITH DT AS (
  SELECT
    T.TRANSID,
    SUM(AMOUNT) AS TRANS_AMT,
    CASE
        WHEN SUM(AMOUNT)/3000 < 0 THEN 0
        WHEN SUM(AMOUNT)/3000 >19 THEN 19
        ELSE INT(SUM(AMOUNT)/3000)
    END AS BUCKET
  FROM TRANS T, TRANSITEM TI
  WHERE T.TRANSID=TI.TRANSID
  GROUP BY T.TRANSID
)
SELECT BUCKET,COUNT(BUCKET) AS HEIGHT, (BUCKET+1)*3000 AS MAX_AMT
FROM DT
GROUP BY BUCKET;

EQUI-HEIGHT HISTOGRAM:
WITH DT AS (
  SELECT
    T.TRANSID,
    SUM(AMOUNT) AS TRANS_AMT,
    ROWNUMBER( ) OVER(ORDER BY SUM(AMOUNT))*10/(SELECT COUNT(DISTINCT TRANSID)+1 FROM STARS.TRANSITEM) AS BUCKET
  FROM STARS.TRANS T, STARS.TRANSITEM TI
  WHERE T.TRANSID=TI.TRANSID
  GROUP BY T.TRANSID
)
SELECT BUCKET,COUNT(BUCKET) AS B_COUNT, MAX(TRANS_AMT) AS PART_VALUE
FROM DT
GROUP BY BUCKET;

NOTE:
THERE ARE 3 RANKING FUNCTIONS: RANK(), DENSERANK(), ROWNUMBER().
 - ROW_NUMBER() & ROWNUMBER ARE SYNONYMS. ALSO DENSE_RANK & DENSERANK ARE SYNONYMS.
THE 3 RANKING FUNCTIONS ARE RANKING ROWS, THAT IS, THEY ASSIGN NUMBERS (1,2,3, ETC). IF ALL RESULTS ARE DIFFERENT - ALL 3 FUNCTIONS PRODUCE THE SAME RESULT. IF WE HAVE DUPLICATES - THEN:
- RANK() - WILL GIVE THEM THE SAME NUMBER - AND SKIP THE NEXT NUMBER(S)
- DENSERANK() - WILL GIVE THEM THE SAME NUMBER - AND NOT SKIP THE NEXT NUMBER(S)
- ROWNUMBER() - WILL NOT GIVE SAME NUMBER AND WILL NOT SKIP. WILL SIMPLY GIVE DIFFERENT NUMBERS TO EVERY ROW. THIS IS THE ONLY RANKING FUNCTION THAT DOES NOT REQUIRE AN ORDERING.
 

 SELECT EMPNUM, DEPT, SALARY, 
   RANK() OVER(PARTITION BY DEPT 
         ORDER BY SALARY DESC NULLS LAST) AS RANK, 
   DENSE_RANK() OVER (PARTITION BY DEPT ORDER BY SALARY DESC NULLS LAST)AS DENSERANK, 
   ROW_NUMBER() OVER (PARTITION BY DEPT ORDER BY SALARY DESC NULLS LAST)AS ROWNUMBER 
  FROM EMPTAB; 

   EMPNUM  DEPT SALARY  RANK  DENSERANK   ROWNUMBER 
   ------  ---- ------  ----  ---------   --------- 
   6       1    78000   1     1           1 
   2       1    75000   2     2           2 
   7       1    75000   2     2           3 
   11      1    53000   4     3           4 
   5       1    52000   5     4           5 
   1       1    50000   6     5           6 
  -------------------------------------------------- 
   9       2    51000   1     1           1 
   4       2       -    2     2           2 

OVER() - TO SPECIFY SETS. THIS INCLUDES PARTITIONNING AND ORDERING INSIDE SETS (SEE MANY EXAMPLES BELOW).
   PARTITION BY
   ORDER BY    ( ASC , DESC ,  NULLS LAST )
   ROWS
   BETWEEN ... AND ...
   N PRECEDING & N FOLLOWING
   UNBOUND PRECEDING  &  UNBOUND FOLLOWING    (TO INCLUDE THE ENTIRE PRECEDING/FOLLOWING PARTITION(S))
   CURRENT ROW
   RANGE BETWEEN UNBOUND PRECEDING AND UNBOUND FOLLOWING
   RANGE BETWEEN CURRENT ROW AND UNBOUND FOLLOWING
 

EXAMPLE: SMOOTHED TIME SERIES (OVER 1 WEEK : 3 PRECEDING DAYS AND 3 FOLLOWING DAYS):
SELECT
  DATE, SYMBOL, CLOSE_PRICE,
  AVG(CLOSE_PRICE) OVER(ORDER BY DATE ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING) AS SMOOTH_CP
FROM STOCKTAB
WHERE SYMBOL = 'IBM' AND DATE BETWEEN '1999-08-01' AND '1999-09-01';
 

ATTENTION: ADDING ORDERING TO THE SET CHANGES THE BEHAVIOUR OF CALCULATIONS TO BECOME "CUMULATIVE". THAT MEANS, CALCULATING ON THE CURRENT ROW - AND ALL ROWS IN THE SET THAT PRECEDE IT WITH RESPECT TO THE ORDERING.

SCALAR-AGGREGATE FUNCTIONS - (VER.7 AND UP) - PERFORM SCALAR CALCULATIONS ON VALUES FROM MULTIPLE ROWS WITHIN A SET TO COMPUTE THE RESULT
 
 

    SELECT DEPT, SALARY, 
           SUM(SALARY) OVER(PARTITION BY DEPT) AS DEPTSUM, 
           AVG(SALARY) OVER(PARTITION BY DEPT) AS AVGSAL, 
           COUNT(*) OVER(PARTITION BY DEPT) AS DEPTCOUNT, 
           MAX(SALARY) OVER(PARTITION BY DEPT) AS MAXSAL 
     FROM EMPTAB; 

    DEPT  SALARY  DEPTSUM  AVGSAL  DEPTCOUNT MAXSAL 
    ----- ------- -------- ------- --------- -------- 
        1   50000 383000   63833         6    78000 
        1   75000 383000   63833         6    78000 
        1   52000 383000   63833         6    78000 
        1   78000 383000   63833         6    78000 
        1   75000 383000   63833         6    78000 
        1   53000 383000   63833         6    78000 
        2       -  51000   51000         2    51000 
        2   51000  51000   51000         2    51000 
        3   79000 209000   69666         3    79000 
        3   55000 209000   69666         3    79000 
        3   75000 209000   69666         3    79000 
        -       -  84000   84000         2    84000 
        -   84000  84000   84000         2    84000

NOTE: THE QUERY ABOVE CONTAINS NO GROUP BY CLAUSE. INSTEAD, THE OVER CLAUSE IS USED TO PARTITION THE DATA SO THAT THE SUM FUNCTION IS
  COMPUTED OVER ROWS IN THE SAME DEPARTMENT, AND THE SUM OF ALL THE SALARIES IN EACH DEPARTMENT IS RETURNED FOR EACH ROW WITHIN THE DEPARTMENT.

ADDING ORDERING INTO A SET TURNS CALCULATIONS INTO CUMMULATIVE OVER THE SET:
 

SELECT 
  DATE, SALES, 
  SUM(SALES) OVER(ORDER BY DATE) AS CUME_SUM, 
  COUNT(*) OVER(ORDER BY DATE) AS SETCOUNT 
FROM SALES 
WHERE YEAR(DATE) = 2000; 

    DATE       SALES        CUME_SUM     SETCOUNT 
    ---------- ------------ ------------ --------- 
    01/01/2000    968871.12    968871.12         1 
    02/01/2000     80050.05   1048921.17         2 
    03/01/2000    757866.14   1806787.31         3 
    04/01/2000     58748.13   1865535.44         4 
    05/01/2000     40711.69   1906247.13         5 
    06/01/2000    241187.78   2147434.91         6 
    07/01/2000    954924.16   3102359.07         7 
    08/01/2000    502822.96   3605182.03         8 
    09/01/2000     97201.45   3702383.48         9 
    10/01/2000    853999.45   4556382.93        10 
    11/01/2000    358775.59   4915158.52        11 
    12/01/2000    437513.35   5352671.87        12

EXAMPLE FOR SEVERAL YEARS:
SELECT DATE, SALES, 
  SUM(SALES) OVER(PARTITION BY YEAR(DATE) 
                  ORDER BY MONTH(DATE)) AS CUME_SUM 
FROM SALES 
WHERE YEAR(DATE) >= 2000;

NOTE: THE ORDER OF EVALUATION OF A QUERY IS AS FOLLOWS:
    1.FROM CLAUSE
    2.WHERE CLAUSE
    3.GROUP BY CLAUSE
    4.HAVING CLAUSE
    5.SELECT LIST (OVER...)

EXAMPLE TAKING ADVANTAGE OF THIS ORDER:

SELECT 
  YEAR(DATE) AS YEAR, 
  SUM(SALES) AS SUM, 
  SUM(SUM(SALES)) OVER(ORDER BY YEAR(DATE)) AS CUME_SUM 
FROM SALES 
WHERE YEAR(DATE) >= 1995 
GROUP BY YEAR(DATE); 

CORRELATION: ( 1 (OR -1) - PERFECT POSITIVE (OR NEGATIVE) RELATIONSHIP ):
SELECT COUNTRY, STATE, CORRELATION(ANNUAL_PURCHASES, INCOME) AS CORRELATION
FROM MYTAB
GROUP BY COUNTRY, STATE
HAVING ABS(CORRELATION(ANNUAL_PURCHASES, INCOME)) > 0.1;

NOTE: SIMILAR FUNCTION - COVARIANCE()

SELECT A.CUSTID AS CUSTID1, B.CUSTID AS CUSTID2, CORR(A.AMOUNT, B.AMOUNT) AS CORR
FROM MYTAB A, MYTAB B
WHERE A.PRODID=B.PRODID AND A.CUSTID < B.CUSTID
BROUP BY A.CUSTID, B.CUSTID
HAVING CORR(A.AMOUNT, B.AMOUNT) >=0.5 AND COUNT(*) > 100
ORDER BY CORR DESC;
======
WITH
  DT (PROD, YEAR, SALES0, SALES1,SALES2) AS (
    SELECT
      PROD, YEAR, TOTAL_SALES,
      MAX(TOTAL_SALES) OVER(PARTITIONBY PROD ORDER BY YEAR ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING),
      MAX(TOTAL_SALES) OVER(PARTITIONBY PROD ORDER BY YEAR ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING)
    FROM MYTAB
  )
SELECT
  PROD,
  CORR(SALES0,SALES1)*100 AS "CORR1(%)",
  CORR(SALES0,SALES2)*100 AS "CORR2(%)"
FROM DT
GROUP BY PROD;

LEAST-SQUARES FIT (LINEAR REGRESSION):  Y=AX+B   FOR A SET OF NON-NULL (Y,X) VALUES:
SELECT
  REGR_COUNT(SALES, AD_BUDGET) AS NUM_CITIES,
  REGR_SLOPE(SALES, AD_BUDGET) AS A,
  REGR_ICPT(SALES, AD_BUDGET) AS B,
  REGR_R2(SALES, AD_BUDGET) AS R-SQUARED
FROM AD_CAMP;

HERE R-SQUARED IS A MEASURE OF THE QUALITY OF THE FIT (ROUGHLY, THE SQUARE OF THE CORR OF X AND Y)

IF YOU NEED Y = AX*X + B - USE REGR_SLOPE(Y,X*X)
ETC.

NOTE: FOR NON-LINEAR CURVES YOU CAN NOT COMPUTE R-SQUARED LIKE THIS:
  SELECT REGR_R2(LOG(HITS), LOG(DAYS)) AS R2 FROM TRAFFIC_DATA;
BECAUSE IT WILL GIVE YOU 0.99, INSTEAD OF 0.95.
A CORRECT WAY IS TO USE THE WITH ... EXPRESSION TO:
  1. CALCULATE THE REGR_SLOPE AND REGR_ICPT FOR THE MODEL
  2. CALCULATE THE RESIDUALS (DIFFERENCES BETWEEN DATA AND THE MODEL FOR EACH POINT)
  3. CALCULATE THE R2 LIKE THIS:
     SELECT 1E0 - (SUM(ERROR*ERROR)/REGR_SYY(HITS,DAYS)) AS R2

TO FIND IF ONE OR TWO POINTS STROGLY INFLUENCE THE MODEL - WE CAN USE HAT MATRIX.
HERE IS HOW TO DO THE HAT DIAGONAL COMPUTATION:
WITH STATS(MX,MX2,SXX) AS  (
  SELECT
    REGR_AVGX(SALES,AD_BUDGET),
    REGR_AVGX(SALES,AD_BUDGET*AD_BUDGET),
    REGR_SXX(SALES,AD_BUDGET)
  FROM MYTAB
)
SELECT D.LABEL AS CITY, (S.MX2 - 2*S.MX*D.X + D.X*D.X) / S.SXX AS HAT
FROM XY_DATA D, STATS S
ORDER BY HAT DESC;

FIT THE LINE IN THE FORM Y=AX :
WE NEED TO COMPUTER A AS FOLLOWING: A = (X1*Y1 + X2*Y2 +..+XN*YN)/(X1^2 + ... + XN^2)
NOTE THAT REGR_SXX = (X1-MX)^2 + ... + (XN-MN)^2
(AND SIMILARLY REGR_SXY)
SO:
   (X1^2 + ... + XN^2) = REGR_SXX + N*MX^2
   (X1*Y1 + X2*Y2 +..+XN*YN) = REGR_SXY + N*MX*MY
THUS WE GET:

SELECT
  REGR_COUNT(KWH,HOURS_RUN) AS NUM_MACHINES,
  (REGR_SXY(KWH,HOURS_RUN) + REGR_COUNT(KWH,HOURS_RUN) * REGR_AVGX(KWH,HOURS_RUN) * REGR_AVGY(KWH,HOURS_RUN))
   /
  (REGR_SXX(KWH,HOURS_RUN) + REGR_COUNT(KWH,HOURS_RUN) * REGR_AVGX(KWH,HOURS_RUN) * REGR_AVGX(KWH,HOURS_RUN))
  AS A
FROM POWER_DATA;

EXAMPLE: WE WANT TO FIND CITIES WHERE ADD CAMPAIGN IS PARTICULARLY EFFECTIVE.
WE USE THE WITH ... CONSTRUCT AND CALCULATE A,B, AND SIGMA AS FOLLOWING:
     REGR_SLOPE() AS A
     REGR_ICPT() AS B
     SQRT((REGR_SYY() - (REGR_SXY()*REGR_SXY()/REGR_SXX()))/REGR_COUNT() - 2)) AS SIGMA

WHERE ALL REGR FUNCTIONS HAVE 2 ARGUMENTS: (SALES, AD_BUDGET)
NOW WE CAN SELECT ... WHERE SALES > A*AD_BUDGET + B + 2E0*SIGMA

SAME REGRESSION FUNCTIONS CAN ALSO BE USED TO COMPUTER OTHER STATISTICAL DATA, LIKE F-STATISTICS.

TO COMPARE 2 SETS AND VERIFY A HYPOTHESIS - PEOPLE HAVE ALWAYS USED T-TEST (WHICH PRESUMES NORMAL DISTRIBUTION).
OR MORE MODERN PROCEDURE CALLED WILCOXON RANK TEST WHICH AVOIDS ABOVE RESTRICTIONS.
IT IS CALCULATED USING THE RANK() FUNCTION ( OR DENSE_RANK() FUNCTION FOR DEALING WITH DUPLICATE TOTALS)
HERE ARE HOW RANKS ARE CALCULATED:
WITH
  RANKED_SALES(CITY,RANKS) AS (
    SELECT CITY, RANK( ) OVER(ORDER BY SALES) FROM FEB_SALES
  )
SELECT SUM(RANKS) AS W FROM RANKED_SALES WHERE CITY = 'B'

THEN ONE NEEDS TABLES FROM SOME STATISTICS BOOKS TO COMPUTE THERESULT.

----- REMOVING DUPLICATES:
CREATE VIEW AAATEMP(RN) AS SELECT ROWNUMBER() OVER(PARTITION BY COL1 || '_' || COL2) FROM MYTAB;
DELETE FROM AAATEMP WHERE RN > 1;
DROP VIEW AAATEMP;

NOTE: THE VIEW DOESN'T HAVE ANY COLUMNS OF THE ORIGINAL TABLE. STILL DELETING FROM THE VIEW DOES THE DELETION FROM THE ORIGINAL TABLE

----- FINDING DUPLICATES WITHOUT CREATING A VIEW:
WITH
  MYTEMP(NAME, DESC_TEXT, RN) AS (
    SELECT NAME, DESC_TEXT, ROWNUMBER() OVER(PARTITION BY DESC_TEXT) AS RN
    FROM S_ORG_EXT WHERE CUST_STAT_CD !='DELETED'
  )
SELECT * FROM MYTEMP WHERE RN > 1

----- DELETE 100 ROWS AT A TIME BY HAND:
CREATE VIEW AAATEMP(RN) AS SELECT ROWNUMBER() OVER() AS RN FROM MYTAB WHERE ....;
DELETE FROM AAATEMP WHERE RN < 100;

----- VERY-VERY SLOW WAY TO GO 10 AT A TIME:
SELECT A.ROW_ID FROM YOURTABLE A
WHERE 10 > (SELECT COUNT(*) FROM YOUTABLE B WHERE A.ROW_ID < B.ROW_ID)
ORDER BY ROW_ID DESC

----- PAGING THROUGH TABLE:
SELECT MYNAME
FROM (SELECT MYNAME, ROWNUMBER() OVER(ORDER BY MYNAME) AS RN FROM MYTABLE) AS TR
WHERE RN BETWEEN 10000 AND 10020

----- RANK:
SELECT NAME, DEPT, SALARY, RANK() OVER(PARTITION BY DEPT ORDER BY SALARY DESC) AS SALARY RANK
FROM STAFF
ORDER BY DEPT, SALARY_RANK

----- MOVING AVERAGE:
SELECT SALES_DATE, AVG(SALES) OVER(ORDER BY SALES_DATE ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS SMOOTHED_SALES
FROM SALES
WHERE SALES_DATE > '03/28/1996' AND SALES_PERSON = 'LEE' AND REGION =
"SOME-REGION'

----- GROUPING SETS:
SELECT YEAR(SALES_DATE) AS YEAR, REGION, SALES_PERSON, COUNT(*) AS TOT_SALES
FROM SALES
GROUP BY GROUPING SETS (YEAR(SALES_DATE), REGION, SALES_PERSON, ( ))
ORDER BY YEAR(SALES_DATE), REGION, SALES_PERSON


 

MORE SQL: EXAMPLES, WITH, HEXADECIMAL, OPTIMIZER

EXAMPLES,

SOME SQL EXAMPLES:

SELECT COUNT(*)  FROM TAB1.S_EMPLOYEE A 
INNER JOIN INST1.S_EMP_POSTN B ON A.ROW_ID = B.EMP_ID 
WHERE NOT LOGIN LIKE 'NO-LOGIN%'

 
SELECT COUNT(*)  FROM INST1.S_EMPLOYEE A
INNER JOIN INST1.S_EMP_POSTN B ON A.ROW_ID = B.EMP_ID 
INNER JOIN INST1.S_POSTN C ON B.POSITION_ID = C.ROW_ID
WHERE 
C.CREATED > '2002-03-12-00.00.01.000000' 
AND C.CREATED < '2002-03-17-23.59.59.999999'
AND NOT A.LOGIN LIKE 'NO-LOGIN%' 

 
SELECT DISTINCT A.LOGIN KERBEROS,
E.NAME POSITION ,
VALUE(C.YES,'NO') WITH_COVERAGE 
FROM INST1.S_EMPLOYEE A
INNER JOIN INST1.S_EMP_POSTN B ON A.ROW_ID = B.EMP_ID
INNER JOIN INST1.S_POSTN E ON B.POSITION_ID = E.ROW_ID 
LEFT OUTER JOIN (SELECT 'YES' YES, POSITION_ID FROM INST1.S_ACCNT_POSTN) C ON E.ROW_ID = C.POSITION_ID 
WHERE NOT A.LOGIN LIKE 'NO-LOGIN%'

 
    SELECT
       VALUE(Z.KERBEROS,Y.KERBEROS) MY_KERBEROS,
       VALUE(Z.FIRST_NAME,Y.FIRST_NAME) MY_FIRST_NAME, 
       VALUE(Z.LAST_NAME,Y.LAST_NAME) MY_LAST_NAME, 
       VALUE(Z.REP_ID,Y.REP_ID) MY_REP_ID, 
       VALUE(Z.DESK_NAME,Y.DESK_NAME) MY_DESK_NAME
     FROM
     
        SELECT
          A.LOGIN KERBEROS, 
          A.FST_NAME FIRST_NAME, 
          A.LAST_NAME LAST_NAME, 
          A.PAGER_PIN REP_ID, 
          C.X_POSITION_NAME DESK_NAME 
        FROM
          INST1.S_EMPLOYEE A, INST1.S_POSTN B, INST1.S_EMP_POSTN D, INST1.S_POSTN C 
        WHERE
          A.ROW_ID = D.EMP_ID  AND D.POSITION_ID = B.ROW_ID AND B.PAR_POSTN_ID = C.ROW_ID  AND  A.PAGER_PIN IS NOT NULL 
      ) Y
      LEFT OUTER JOIN

     
        SELECT
          A.LOGIN KERBEROS, 
          A.FST_NAME FIRST_NAME, 
         A.LAST_NAME LAST_NAME,
          A.PAGER_PIN REP_ID, 
          C.X_POSITION_NAME DESK_NAME
        FROM
          INST1.S_EMPLOYEE A, INST1.S_POSTN B, INST1.S_POSTN C 
        WHERE
          A.PR_POSTN_ID = B.ROW_ID AND B.PAR_POSTN_ID = C.ROW_ID AND A.PAGER_PIN IS NOT NULL 
      ) Z
     ON Y.KERBEROS = Z.KERBEROS
  WHERE VALUE(Z.KERBEROS,Y.KERBEROS) NOT IN ('SOMETHING1','SOMETHING2') 
  ORDER BY MY_REP_ID


 
SELECT COL1 FROM TAB1 GROUP BY COL1 HAVING COUNT(*) > 1

SELECT * FROM TAB1 WHERE LCASE(COL1) = 'O''BRIEN'

SOME (NON)-TRIVIAL EXAMPLES:

IN SYBASE WE FREQUENTLY USE TEMP. TABLES AND STORED PROCEDURE WHEN WE NEED TO GET A RESULT FROM MANY TABLES.
FOR EXAMPLE,

CREATE TEMP TABLE #TEMP1
INSERT #TEMP1 SELECT ... -- POPULATE SOME OF ITS COLUMNS WITH DATA
UPDATE #TEMP1 SET ...  -- POPULATE OTHER COLUMNS
UPDATE #TEMP1 SET ... -- AGAIN
UPDATE #TEMP1 SET ...  -- AGAIN
SELECT ... FROM #TEMP1 WHERE ...

IN DB2 THIS CAN BE ALL EXPRESSED AS ONE SQL STATEMENT LIKE THIS:

WITH QUERY1 ( COL1, COL2, COL3, ..., COLN) AS ( SELECT ... ),
       QUERY2 (COL, ....) AS (SELECT .. FROM QUERY1 LEFT OUTER JOIN ... ON ... ),
       QUERY3 (COL, ....) AS (SELECT .. FROM QUERY2 LEFT OUTER JOIN ... ON ... ),
       QUERY4 (COL, ....) AS (SELECT .. FROM QUERY3 LEFT OUTER JOIN ... ON ... )
SELECT  ..... FROM QUERY4 WHERE ...

---------------------------- ONE MORE QUERY USING THE 'WITH' CONSTRUCT

WITH
  MYKID (KID) AS (
    SELECT USER_ID FROM ... WHERE ...
    UNION
    SELECT KID FROM ... WHERE ...
  ),
  MYACCOUNTIDS (ACCOUNTID) AS (
    SELECT DISTINCT ACCOUNTID FROM MYKID T, ... WHERE ...
  ),
  MYSOMETHING (COL1, COL2, ...) AS (
    SELECT ... FROM ... WHERE ...
  )
SELECT ... FROM ... WHERE ...
 

---------------------------- RETURN 1 IF SOMETHING EXISTS, OTHERWISE RETURN 0

WITH T1 (IS_MGR) AS (
  SELECT 1 IS_MGR FROM TABLE (VALUES 1) AS T2
  WHERE EXISTS (SELECT 1 FROM ... WHERE ...)
  UNION
  SELECT 0 IS_MGR FROM TABLE (VALUES 0) AS T2
)
SELECT * FROM T1 ORDER BY IS_MGR DESC FETCH FIRST 1 ROWS ONLY

---------------------------- 

SELECT DISTINCT ... , CAST(NULL AS CHAR) AS FIRST_NAME, CAST(NULL AS CHAR) AS LAST_NAME
FROM TAB1, TABLE (SELECT ... FROM ... WHERE ... ) AS T
WHERE ... = CAST(SUBSTR(T.USER_ID,1,LOCATE('_', T.USER_ID) -1_ AS INT)

----------------------------

SELECT COL1 FROM TAB1 GROUP BY COL1 HAVING COUNT(*) > 1

SELECT * FROM TAB1 WHERE LCASE(COL1) = 'O''BRIEN'

---------------------------- 

GET TOP TEN:

SELECT NAME, SALARY FROM EMPLOYEE A
WHERE 10 > (SELECT COUNT(*) FROM EMPLOYEE B WHERE A.SALARY < B.SALARY)
ORDER BY SALARY DESC

SELECT NAME, SALARY FROM EMPLOYEE A ORDER BY SALARY DESC FETCH FIRST 10 ROWS ONLY
 

---------------------------- 

GET LIST OF PAIRS WITH EQUAL VALUES:

SELECT A.NAME, A.BIRTHDAY, B.NAME
FROM EMPLOYEE A, EMPLOYEE B
WHERE A.BIRTHDAY = B.BIRTHDAY
AND A.EMP_NO > B.EMP_NO

 

WORKING WITH HEXADECIMAL:
 

VALUES( HEX(1024) )
VALUES ( X'3233' )
VALUES ( CAST(X'3233' AS INTEGER) + 2 )
VALUES ( CAST(X'3233' AS CHAR(2)) || ' - TWENTY THREE' )
VALUES ( X'4672616E6B' )
-- FRANK
VALUES ( X'30' )
-- 0
VALUES ( X'30313233' )
-- 0123

-- NOW LET'S GET NAMES WITH A DIGIT 8 IN THEM:
SELECT NAME FROM SIEBEL.S_ORG_EXT 
WHERE NAME LIKE '%' || X'38' || '%'
FETCH FIRST 10 ROWS ONLY

OPTIMIZING QUERIES:

DB2 -TVF TEST.SQL
DB2 RUNSTATS ON TABLE MYTAB WITH DISTRIBUTION AND DETAILED INDEXES ALL SHRLEVEL CHANGE
 
VISUAL EXPLAIN A GRAPHICAL TOOL AVAILABLE IN THE DB2  COMMAND CENTER (OR CONTROL CENTER) - TO EXAMINE SINGLE QUERIES, WHETHER STATIC OR DYNAMIC. SHOWS A COLOR-CODED TREE. CLICKING ON A NODE ALLOWS YOU TO VIEW THE ARGUMENTS, STATISTICS, AND COST ESTIMATE OF THE NODE. YOU CAN ALSO JUMP DIRECTLY TO DB2'S DOCUMENTATION ON THAT NODE TYPE. VISUAL EXPLAIN CAN ALSO BE RUN FROM THE COMMAND LINE AS DB2VEXP.EXE, THOUGH IT THEN LACKS STATIC SQL ABILITY.
DB2EXPLN THE "BARE BONES" TOOL, GIVING TEXT OUTPUT FROM STATIC SQL PACKAGES ONLY. THIS CAN OPTIONALLY INCLUDE A CHARACTER MODE GRAPH.
DYNEXPLN GIVES A TEXT-MODE ANALYSIS OF A DYNAMIC SQL QUERY. IT ACTUALLY PACKAGES THE DYNAMIC QUERY AND CALLS DB2EXPLN TO DO THE WORK.  FROM UNIX COMMAND LINE INVOKE IT AS:
  DYNEXPLN -D MYDB -F TEST.SQL
  DYNEXPLN -H
DB2EXFMT  A FORMATTER FOR PREVIOUSLY STORED EXPLAIN DATA.