DECLARE @c int = 1000;
DECLARE @numbers TABLE (n int NOT NULL PRIMARY KEY);
DECLARE @products TABLE (p int NOT NULL PRIMARY KEY);
DECLARE @primes TABLE (p int NOT NULL PRIMARY KEY);
-- The 'composite exclusion' approach
-- 1. list all n = 2, 3, 4, ... c
WITH numbers AS
(
SELECT 2 AS n
UNION ALL
SELECT n + 1 FROM numbers
WHERE n <= @c - 1
)
INSERT INTO @numbers SELECT n FROM numbers OPTION(MAXRECURSION 0);
-- 2. find all products n x n <= c
WITH products AS
(
SELECT DISTINCT m.n * n.n AS p
FROM @numbers m LEFT OUTER JOIN
@numbers n ON 1 = 1
WHERE m.n * n.n <= @c
)
INSERT INTO @products SELECT p FROM products;
-- 3. numbers with no matching products are not composite, i.e, they're prime numbers.
INSERT INTO @primes
SELECT n.n FROM @numbers n LEFT JOIN @products p ON n.n = p.p WHERE p.p IS NULL;
It's kind of a one pass Sieve of Eratosthenes approach.
I've seen loops, stored procedures and the like, as well as pseudo-code and other language implementations, but it seems to me that this simple, set-based approach stemming from the definition of prime numbers should suffice.
Please bear in mind I'm not concerned with performance or memory consumption or optimizations at this time, and I have not tested it with larger numbers. I just want to publish the algorithm and have people confirm (or challenge) that excluding composite numbers from the list is enough.
Recursive CTEs (rCTE) are very rarely the best performing solution. Below is an approach that uses a tally table, it's a slightly tweaked version of the approach that Hugo Kornelis posted here: https://sqlserverfast.com/blog/hugo/2006/09/the-prime-number-challenge-great-waste-of-time/
Let's compare the tally table solution to the rCTE solution:
SET STATISTICS TIME ON;
PRINT 'tally table approach'+char(13)+char(10)+replicate('-',50);
DECLARE @primes TABLE (p int NOT NULL PRIMARY KEY);
DECLARE @limit bigint = 10000;
WITH E(x) AS (SELECT * FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(x)),
iTally(N) AS (SELECT TOP(@limit) ROW_NUMBER() OVER (ORDER BY (SELECT 1)) FROM E a, E b, E c, E d, E f)
INSERT @primes
SELECT n1.N
FROM itally AS n1
WHERE n1.N > 1
AND n1.N < @Limit
AND NOT EXISTS
(SELECT *
FROM itally AS n2
WHERE n2.N < @limit
AND n2.N BETWEEN 2 AND n1.N-1
AND n1.n % n2.N = 0)
--ORDER BY N
GO
PRINT 'rCTE approach'+char(13)+char(10)+replicate('-',50);
DECLARE @c int = 10000;
DECLARE @numbers TABLE (n int NOT NULL PRIMARY KEY);
DECLARE @products TABLE (p int NOT NULL PRIMARY KEY);
DECLARE @primes TABLE (p int NOT NULL PRIMARY KEY);
WITH numbers AS
(
SELECT 2 AS n
UNION ALL
SELECT n + 1 FROM numbers
WHERE n <= @c - 1
)
INSERT INTO @numbers SELECT n FROM numbers OPTION(MAXRECURSION 0);
-- 2. find all products n x n <= c
WITH products AS
(
SELECT DISTINCT m.n * n.n AS p
FROM @numbers m LEFT OUTER JOIN
@numbers n ON 1 = 1
WHERE m.n * n.n <= @c
)
INSERT INTO @products SELECT p FROM products;
-- 3. numbers with no matching products are not composite, i.e, they're prime numbers.
INSERT INTO @primes
SELECT n.n FROM @numbers n LEFT JOIN @products p ON n.n = p.p WHERE p.p IS NULL;
SET STATISTICS TIME OFF;
and the results:
tally table approach
--------------------------------------------------
SQL Server Execution Times:
CPU time = 3042 ms, elapsed time = 3241 ms.
SQL Server parse and compile time:
CPU time = 0 ms, elapsed time = 10 ms.
rCTE approach
--------------------------------------------------
SQL Server Execution Times:
CPU time = 14976 ms, elapsed time = 15757 ms.
As you can see, the tally table approach against 10,000 was 5 times faster and also doesn't produce any reads (the rCTE produces a ton!)
If you are really working with prime numbers the absolute fastest approach would be to store them in a table so you don't need to calculate them each time you need prime numbers.