可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
I'm struggling with a tricky SQL query that I'm trying to write. Have a look at the following table:
+---+---+
| A | B |
+---+---+
| 1 | 2 |
| 1 | 3 |
| 2 | 2 |
| 2 | 3 |
| 2 | 4 |
| 3 | 2 |
| 3 | 3 |
| 4 | 2 |
| 4 | 3 |
| 4 | 4 |
+---+---+
Now, from this table, I essentially want a list of all As which have the exact same set of Bs and give each set an incrementing ID.
Hence, the output set for the above would be:
+---+----+
| A | ID |
+---+----+
| 1 | 1 |
| 3 | 1 |
| 2 | 2 |
| 4 | 2 |
+---+----+
Thanks.
Edit: If it helps, I have a list of all distinct values of B that are possible in another table.
Edit: Thank you so much for all the innovative answers. Was able to learn a lot indeed.
回答1:
Here is mathematical trick to solve your tricky select:
with pow as(select *, b * power(10, row_number()
over(partition by a order by b)) as rn from t)
select a, dense_rank() over( order by sum(rn)) as rn
from pow
group by a
order by rn, a
Fiddle http://sqlfiddle.com/#!3/6b98d/11
This of course will work only for limited distinct count as you will get overflow. Here is more general solution with strings:
select a,
dense_rank() over(order by (select '.' + cast(b as varchar(max))
from t t2 where t1.a = t2.a
order by b
for xml path(''))) rn
from t t1
group by a
order by rn, a
Fiddle http://sqlfiddle.com/#!3/6b98d/29
回答2:
Something like this:
select a, dense_rank() over (order by g) as id_b
from (
select a,
(select b from MyTable s where s.a=a.a order by b FOR XML PATH('')) g
from MyTable a
group by a
) a
order by id_b,a
Or maybe using a CTE (I avoid them when possible)
Sql Fiddle
As a side note, this is the output of the inner query using the sample data in the question:
a g
1 <b>2</b><b>3</b>
2 <b>2</b><b>3</b><b>4</b>
3 <b>2</b><b>3</b>
4 <b>2</b><b>3</b><b>4</b>
回答3:
Here's a long winded approach, by finding sets with the same elements (using EXCEPT
bidirectionally to eliminate, and just done a half diagonal cartesian product), then pairing equal sets up, stamping each pair with a ROW_NUMBER()
, before unpivoting the pairs of A's
into to your final output where the equivalent sets are projected as rows which have the same id
.
WITH joinedSets AS
(
SELECT t1.A as t1A, t2.A AS t2A
FROM MyTable t1
INNER JOIN MyTable t2
ON t1.B = t2.B
AND t1.A < t2.A
),
equalSets AS
(
SELECT js.t1A, js.t2A, ROW_NUMBER() OVER (ORDER BY js.t1A) AS Id
FROM joinedSets js
GROUP BY js.t1A, js.t2A
HAVING NOT EXISTS ((SELECT mt.B FROM MyTable mt WHERE mt.A = js.t1A)
EXCEPT (SELECT mt.B FROM MyTable mt WHERE mt.A = js.t2A))
AND NOT EXISTS ((SELECT mt.B FROM MyTable mt WHERE mt.A = js.t2A)
EXCEPT (SELECT mt.B FROM MyTable mt WHERE mt.A = js.t1A))
)
SELECT A, Id
FROM equalSets
UNPIVOT
(
A
FOR ACol in (t1A, t2A)
) unp;
SqlFiddle here
As it stands, this solution will only work with pairs of sets, not triples etc. A general NTuple
type solution is probably possible (but beyond my brain right now).
回答4:
Here is a very simple, fast, but approximate solution.
It is possible that CHECKSUM_AGG
returns the same checksum for different sets of B.
DECLARE @T TABLE (A int, B int);
INSERT INTO @T VALUES
(1, 2),(1, 3),(2, 2),(2, 3),(2, 4),(3, 2),(3, 3),(4, 2),(4, 3),(4, 4);
SELECT
A
,CHECKSUM_AGG(B) AS CheckSumB
,ROW_NUMBER() OVER (PARTITION BY CHECKSUM_AGG(B) ORDER BY A) AS GroupNumber
FROM @T
GROUP BY A
ORDER BY A, GroupNumber;
Result set
A CheckSumB GroupNumber
-----------------------------
1 1 1
2 5 1
3 1 2
4 5 2
For exact solution group by A
and concatenate all B
values into a long (binary) string using either FOR XML, CLR, or T-SQL function. Then you can partition ROW_NUMBER
by that concatenated string to assign numbers to the groups. As shown in other answers.
回答5:
EDIT
I am changing the code, but it will get bigger now, took help from
Concatenate many rows into a single text string? for concatinating strings
Select [A],
Left(M.[C],Len(M.[C])-1) As [D] into #tempSomeTable
From
(
Select distinct T2.[A],
(
Select Cast(T1.[B] as VARCHAR) + ',' AS [text()]
From sometable T1
Where T1.[A] = T2.[A]
ORDER BY T1.[A]
For XML PATH ('')
) [C]
From sometable T2
)M
SELECT t.A, DENSE_RANK() OVER(ORDER BY t.[D]) [ID] FROM
#tempSomeTable t
inner join
(SELECT [D] FROM(
SELECT [D], COUNT([A]) [D_A] from
#tempSomeTable t
GROUP BY [D] )P where [C_A]>1)t1 on t1.[D]=t.[D]
回答6:
Here is an exact, rather than approximate, solution. It uses nothing more advanced than INNER JOIN and GROUP BY (and, of course, the DENSE_RANK() to get the ID you want).
It is also general, in that it allows for B values to be repeated within an A group.
SELECT A,
DENSE_RANK() OVER (ORDER BY MIN_EQUIVALENT_A) AS ID
FROM (
SELECT MATCHES.A1 AS A,
MIN(MATCHES.A2) AS MIN_EQUIVALENT_A
FROM (
SELECT T1.A AS A1,
T2.A AS A2,
COUNT(*) AS NUM_B_VALS_MATCHED
FROM (
SELECT A,
B,
COUNT(*) AS B_VAL_FREQ
FROM MyTable
GROUP BY A,
B
) AS T1
INNER JOIN
(
SELECT A,
B,
COUNT(*) AS B_VAL_FREQ
FROM MyTable
GROUP BY A,
B
) AS T2
ON T1.B = T2.B
AND T1.B_VAL_FREQ = T2.B_VAL_FREQ
GROUP BY T1.A,
T2.A
) AS MATCHES
INNER JOIN
(
SELECT A,
COUNT(DISTINCT B) AS NUM_B_VALS_TOTAL
FROM MyTable
GROUP BY A
) AS CHECK_TOTALS_A1
ON MATCHES.A1 = CHECK_TOTALS_A1.A
AND MATCHES.NUM_B_VALS_MATCHED
= CHECK_TOTALS_A1.NUM_B_VALS_TOTAL
INNER JOIN
(
SELECT A,
COUNT(DISTINCT B) AS NUM_B_VALS_TOTAL
FROM MyTable
GROUP BY A
) AS CHECK_TOTALS_A2
ON MATCHES.A2 = CHECK_TOTALS_A2.A
AND MATCHES.NUM_B_VALS_MATCHED
= CHECK_TOTALS_A2.NUM_B_VALS_TOTAL
GROUP BY MATCHES.A1
) AS EQUIVALENCE_TABLE
ORDER BY 2,1
;