Post date: Mar 30, 2014 1:58:28 PM
"Find the sum of all the primes below two million."
Python provides a much faster solution (10s) than SQL, but in SQL it's a good query optimization experiment. I've tried a few different ways, and this was the best, at under 50 seconds. Surprisingly, EXCEPT performs much better than EXISTS. In this case, stopping at the first bad result used a LOT more time than a union of thousands of numbers. I was not expecting that at all, and shows why it's important to experiment.
CREATE TABLE #Integers (i int not null primary key clustered)
INSERT INTO #Integers (i) VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)
-- Fill a table with numbers between 1 and 2 million, because pulling from a physical table is faster than calculating on the fly
; WITH Integers(i) AS
(
SELECT o.i +
(10 * d.i) +
(100 * h.i) +
(1000 * k.i) +
(10000 * k10.i) +
(100000 * k100.i) +
(1000000 * m.i)
FROM #Integers o
CROSS JOIN #Integers d
CROSS JOIN #Integers h
CROSS JOIN #Integers k
CROSS JOIN #Integers k10
CROSS JOIN #Integers k100
CROSS JOIN #Integers m
WHERE m.i < 2
)
INSERT INTO #Integers (i)
SELECT i
FROM Integers
WHERE i >= 10
-- It isn't necessary to check against every number, so fill a smaller table that only allows the possible divisors.
CREATE TABLE #Divisors (i int not null primary key clustered)
INSERT INTO #Divisors
SELECT i
FROM #Integers
WHERE i % 2 != 0
AND i > 2
AND i < SQRT(2000000) + 1
;WITH NotPrimes(i) AS
(
-- Numbers divisible by 2 (other than 2) aren't prime)
SELECT i.i
FROM #Integers i
WHERE i.i > 2
AND i % 2 = 0
UNION
-- Numbers divisible by any number aren't prime.
SELECT i.i
FROM #Integers i
CROSS APPLY
(
SELECT 1 AS notprime
FROM #Divisors d
WHERE i.i % d.i = 0
AND d.i <= SQRT(i.i) + 1
) notprime
WHERE i.i > 2
),
Primes (i) AS
(
-- A prime is any number greater than 1 that is not non-prime.
SELECT i
FROM #Integers
WHERE i >= 2
EXCEPT
SELECT i
FROM NotPrimes
WHERE i > 3
)
SELECT SUM(CONVERT(bigint,i))
FROM Primes
DROP TABLE #Integers
DROP TABLE #Divisors