SELECT
member_casual,
AVG(CASE
-- Fix instances where start and end timestamps were reversed.
WHEN TIMESTAMP_DIFF(ended_at, started_at, MINUTE) BETWEEN -3890 AND 0
THEN ABS(TIMESTAMP_DIFF(ended_at, started_at, MINUTE))
-- Fix instances where end timestamps had the prior month instead of current month.
WHEN TIMESTAMP_DIFF(ended_at, started_at, MINUTE) < -3890
THEN TIMESTAMP_DIFF((TIMESTAMP_ADD(ended_at, INTERVAL 30 DAY)), started_at, MINUTE)
ELSE TIMESTAMP_DIFF(ended_at, started_at, MINUTE)
END) AS avg_ride_length_minutes
FROM (
--Combine rows from all 12 months. Remove duplicates via DISTINCT.
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202007`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_1`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_2`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202009`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202010`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202011`
--Correct mismatched data types for remaining months of data. Change station IDs from strings to integers.
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202012`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202101`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202102`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202103`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202104`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202105`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_1`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_2`
))
GROUP BY member_casual;
SELECT
rideable_type,
member_casual,
COUNT(*) AS number_of_rides
FROM (
--Combine rows from all 12 months. Remove duplicates via DISTINCT.
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202007`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_1`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_2`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202009`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202010`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202011`
--Correct mismatched data types for remaining months of data. Change station IDs from strings to integers.
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202012`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202101`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202102`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202103`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202104`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202105`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_1`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_2`
))
GROUP BY rideable_type, member_casual
ORDER BY rideable_type, member_casual;
SELECT
EXTRACT(HOUR FROM started_at) AS hour_of_day,
member_casual,
COUNT(*) AS number_of_rides,
AVG(CASE
-- Fix instances where start and end timestamps were reversed.
WHEN TIMESTAMP_DIFF(ended_at, started_at, MINUTE) BETWEEN -3890 AND 0
THEN ABS(TIMESTAMP_DIFF(ended_at, started_at, MINUTE))
-- Fix instances where end timestamps had the prior month instead of current month.
WHEN TIMESTAMP_DIFF(ended_at, started_at, MINUTE) < -3890
THEN TIMESTAMP_DIFF((TIMESTAMP_ADD(ended_at, INTERVAL 30 DAY)), started_at, MINUTE)
ELSE TIMESTAMP_DIFF(ended_at, started_at, MINUTE)
END) AS avg_ride_length_minutes
FROM (
--Combine rows from all 12 months. Remove duplicates via DISTINCT.
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202007`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_1`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_2`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202009`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202010`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202011`
--Correct mismatched data types for remaining months of data. Change station IDs from strings to integers.
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202012`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202101`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202102`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202103`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202104`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202105`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_1`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_2`
))
GROUP BY hour_of_day, member_casual
ORDER BY hour_of_day, member_casual;
SELECT
EXTRACT(DAYOFWEEK FROM started_at) AS day_of_week,
member_casual,
COUNT(*) AS number_of_rides,
AVG(CASE
-- Fix instances where start and end timestamps were reversed.
WHEN TIMESTAMP_DIFF(ended_at, started_at, MINUTE) BETWEEN -3890 AND 0
THEN ABS(TIMESTAMP_DIFF(ended_at, started_at, MINUTE))
-- Fix instances where end timestamps had the prior month instead of current month.
WHEN TIMESTAMP_DIFF(ended_at, started_at, MINUTE) < -3890
THEN TIMESTAMP_DIFF((TIMESTAMP_ADD(ended_at, INTERVAL 30 DAY)), started_at, MINUTE)
ELSE TIMESTAMP_DIFF(ended_at, started_at, MINUTE)
END) AS avg_ride_length_minutes
FROM (
--Combine rows from all 12 months. Remove duplicates via DISTINCT.
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202007`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_1`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_2`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202009`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202010`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202011`
--Correct mismatched data types for remaining months of data. Change station IDs from strings to integers.
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202012`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202101`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202102`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202103`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202104`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202105`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_1`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_2`
))
GROUP BY day_of_week, member_casual
ORDER BY day_of_week, member_casual;
SELECT
EXTRACT(MONTH FROM started_at) AS month,
member_casual,
COUNT(*) AS number_of_rides,
AVG(CASE
-- Fix instances where start and end timestamps were reversed.
WHEN TIMESTAMP_DIFF(ended_at, started_at, MINUTE) BETWEEN -3890 AND 0
THEN ABS(TIMESTAMP_DIFF(ended_at, started_at, MINUTE))
-- Fix instances where end timestamps had the prior month instead of current month.
WHEN TIMESTAMP_DIFF(ended_at, started_at, MINUTE) < -3890
THEN TIMESTAMP_DIFF((TIMESTAMP_ADD(ended_at, INTERVAL 30 DAY)), started_at, MINUTE)
ELSE TIMESTAMP_DIFF(ended_at, started_at, MINUTE)
END) AS avg_ride_length_minutes
FROM (
--Combine rows from all 12 months. Remove duplicates via DISTINCT.
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202007`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_1`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_2`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202009`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202010`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202011`
--Correct mismatched data types for remaining months of data. Change station IDs from strings to integers.
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202012`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202101`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202102`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202103`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202104`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202105`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_1`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_2`
))
GROUP BY month, member_casual
ORDER BY month, member_casual;
--most popular locations, casual riders
SELECT
concat(start_lat, ', ', start_lng) AS location,
COUNT(*) AS number_of_rides
FROM (
--Combine rows from all 12 months. Remove duplicates via DISTINCT.
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202007`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_1`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_2`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202009`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202010`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202011`
--Correct mismatched data types for remaining months of data. Change station IDs from strings to integers.
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202012`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202101`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202102`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202103`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202104`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202105`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_1`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_2`
))
WHERE member_casual LIKE 'casual'
GROUP BY location
ORDER BY COUNT(*) DESC
LIMIT 100;
--most popular locations, casual riders
SELECT
concat(start_lat, ', ', start_lng) AS location,
COUNT(*) AS number_of_rides
FROM (
--Combine rows from all 12 months. Remove duplicates via DISTINCT.
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202007`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_1`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202008_2`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202009`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202010`
UNION DISTINCT
SELECT DISTINCT *
FROM `cyclistic-322919.tripdata.202011`
--Correct mismatched data types for remaining months of data. Change station IDs from strings to integers.
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202012`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202101`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202102`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202103`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202104`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202105`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_1`
)
UNION DISTINCT
SELECT DISTINCT *
FROM (
SELECT
ride_id, rideable_type, started_at, ended_at, start_station_name,
SAFE_CAST(start_station_id AS INT64), end_station_name,
SAFE_CAST(end_station_id AS INT64), start_lat, start_lng, end_lat, end_lng, member_casual
FROM `cyclistic-322919.tripdata.202106_2`
))
WHERE member_casual LIKE 'member'
GROUP BY location
ORDER BY COUNT(*) DESC
LIMIT 100;