可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
Can you help me with SQL statements to find duplicates on multiple fields?
For example, in pseudo code:
select count(field1,field2,field3)
from table
where the combination of field1, field2, field3 occurs multiple times
and from the above statement if there are multiple occurrences I would like to select every record except the first one.
回答1:
To get the list of fields for which there are multiple records, you can use..
select field1,field2,field3, count(*)
from table_name
group by field1,field2,field3
having count(*) > 1
Check this link for more information on how to delete the rows.
http://support.microsoft.com/kb/139444
Edit : As the other users mentioned, there should be a criterion for deciding how you define \"first rows\" before you use the approach in the link above. Based on that you\'ll need to use an order by clause and a sub query if needed. If you can post some sample data, it would really help.
回答2:
You mention \"the first one\", so I assume that you have some kind of ordering on your data. Let\'s assume that your data is ordered by some field ID
.
This SQL should get you the duplicate entries except for the first one. It basically selects all rows for which another row with (a) the same fields and (b) a lower ID exists. Performance won\'t be great, but it might solve your problem.
SELECT A.ID, A.field1, A.field2, A.field3
FROM myTable A
WHERE EXISTS (SELECT B.ID
FROM myTable B
WHERE B.field1 = A.field1
AND B.field2 = A.field2
AND B.field3 = A.field3
AND B.ID < A.ID)
回答3:
This is a fun solution with SQL Server 2005 that I like. I\'m going to assume that by \"for every record except for the first one\", you mean that there is another \"id\" column that we can use to identify which row is \"first\".
SELECT id
, field1
, field2
, field3
FROM
(
SELECT id
, field1
, field2
, field3
, RANK() OVER (PARTITION BY field1, field2, field3 ORDER BY id ASC) AS [rank]
FROM table_name
) a
WHERE [rank] > 1
回答4:
To see duplicate value
with MYCTE as (
select row_number() over ( partition by name order by name) rown, * from tmptest
)
select * from MYCTE where rown <=1
回答5:
If you\'re using SQL Server 2005 or later (and the tags for your question indicate SQL Server 2008), you can use ranking functions to return the duplicate records after the first one if using joins is less desirable or impractical for some reason. The following example shows this in action, where it also works with null values in the columns examined.
create table Table1 (
Field1 int,
Field2 int,
Field3 int,
Field4 int
)
insert Table1
values (1,1,1,1)
, (1,1,1,2)
, (1,1,1,3)
, (2,2,2,1)
, (3,3,3,1)
, (3,3,3,2)
, (null, null, 2, 1)
, (null, null, 2, 3)
select *
from (select Field1
, Field2
, Field3
, Field4
, row_number() over (partition by Field1
, Field2
, Field3
order by Field4) as occurrence
from Table1) x
where occurrence > 1
Notice after running this example that the first record out of every \"group\" is excluded, and that records with null values are handled properly.
If you don\'t have a column available to order the records within a group, you can use the partition-by columns as the order-by columns.
回答6:
CREATE TABLE #tmp
(
sizeId Varchar(MAX)
)
INSERT #tmp
VALUES (\'44\'),
(\'44,45,46\'),
(\'44,45,46\'),
(\'44,45,46\'),
(\'44,45,46\'),
(\'44,45,46\'),
(\'44,45,46\')
SELECT * FROM #tmp
DECLARE @SqlStr VARCHAR(MAX)
SELECT @SqlStr = STUFF((SELECT \',\' + sizeId
FROM #tmp
ORDER BY sizeId
FOR XML PATH(\'\')), 1, 1, \'\')
SELECT TOP 1 * FROM (
select items, count(*)AS Occurrence
FROM dbo.Split(@SqlStr,\',\')
group by items
having count(*) > 1
)K
ORDER BY K.Occurrence DESC