EXPLORATORY ANALYSIS - MYSQL
EXPLORATORY ANALYSIS - MYSQL
--- inspecting the data
SELECT * FROM project1.`sql project 1`;
--- Checking unique values
select distinct status from project1.`sql project 1`;
select distinct year_id from project1.`sql project 1`;
select distinct PRODUCTLINE from project1.`sql project 1`;
select distinct COUNTRY from project1.`sql project 1`;
select distinct DEALSIZE from project1.`sql project 1`;
select distinct TERRITORY from project1.`sql project 1`;
select distinct MONTH_ID from project1.`sql project 1`
where year_id = 2003
--- Analysis
--- grouping sales by PRODUCTLINE
select PRODUCTLINE, sum(sales) as Revenue
from project1.`sql project 1`
group by PRODUCTLINE
order by 2 desc;
-----grouping by YEAR_ID
select YEAR_ID, sum(sales) Revenue
from project1.`sql project 1`
group by YEAR_ID
order by 2 desc;
--- why does 2005 have the least sales? let's see if they operated for the full year
select distinct MONTH_ID
from project1.`sql project 1`
where YEAR_ID = 2005;
--- checking to see if the business ran through the year for 2003 & 2004
select distinct MONTH_ID
from project1.`sql project 1`
where YEAR_ID = 2003;
-----grouping by DEALSIZE
select DEALSIZE, sum(sales) Revenue
from project1.`sql project 1`
group by DEALSIZE
order by 2 desc;
--- What was the best month for sales in a specific year? How much was earned that month?
select MONTH_ID, sum(sales) Revenue, count(ORDERNUMBER) Frequency
from project1.`sql project 1`
where YEAR_ID = 2004
group by MONTH_ID
order by 2 desc;
select MONTH_ID, sum(sales) Revenue, count(ORDERNUMBER) Frequency
from project1.`sql project 1`
where YEAR_ID = 2005
group by MONTH_ID
order by 2 desc;
select MONTH_ID, sum(sales) Revenue, count(ORDERNUMBER) Frequency
from project1.`sql project 1`
where YEAR_ID = 2003
group by MONTH_ID
order by 2 desc;
--- November seems to be the month for 2004, what product do they sell in November?
select MONTH_ID, PRODUCTLINE, sum(sales) Revenue, count(ORDERNUMBER)
from project1.`sql project 1`
where YEAR_ID = 2004 and MONTH_ID = 11
group by MONTH_ID, PRODUCTLINE
order by 3 desc;
--- Who is our best customer (this could be best answered with RFM)
SELECT
CUSTOMERNAME,
max(ORDERDATE) AS R,
COUNT(DISTINCT ORDERNUMBER) AS F,
SUM(SALES) AS M
FROM project1.`sql project 1`
GROUP BY CUSTOMERNAME
ORDER BY R DESC, F DESC, M DESC;
SELECT
DATE(orderdate) 'Date/Time'
CURDATE(),
DATEDIFF(orde, CURDATE()) AS 'Date'
--- EXTRAs
--- What city has the highest number of sales in a specific country
select city, sum(sales) Revenue
from project1.`sql project 1`
where country = 'UK'
group by city
order by 2 desc;
-- What is the best product in United States?
select country, YEAR_ID, PRODUCTLINE, sum(sales) Revenue
from project1.`sql project 1`
where country = 'USA'
group by country, YEAR_ID, PRODUCTLINE
order by 4 desc;