I need to remove semi duplicate records from the following table
ID PID SCORE
1 1 50
2 33 20
3 1 90
4 5 55
5 7 11
6 22 34
For any duplicate PID's that exist I want to remove the lowest scoring record. In the example above ID 1 would be remove. I'm trying to come up with a way of doing this without using loops but am really struggling.
Any help would be appreciated.
Thanks
DELETE t.*
FROM Table1 t
JOIN (SELECT pid, MIN(score) minScore, MAX(id) maxId
FROM Table1
GROUP BY pid) t1
ON t.pid = t1.pid
AND t.score = t1.minScore
AND t.id < t1.maxId
WITH q AS
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY pid ORDER BY score) AS rn
FROM mytable
)
DELETE
FROM q
WHERE rn = 1
To leave the results that have no duplicates:
WITH q AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY pid ORDER BY score) AS rn,
COUNT(*) OVER (PARTITION BY pid) AS cnt
FROM mytable
)
DELETE
FROM q
WHERE rn = 1
AND cnt > 1
try this..
declare @tt table(id int, pid int,score int)
insert into @tt
select 1,1,50 union all
select 2,33,50 union all
select 8,33,80 union all
select 3,1,90 union all
select 4,5,50 union all
select 5,5,10 union all
select 6,6,10 union all
select 7,6,50
---------
delete from @tt where id in (
select t1.id from @tt t1 inner join
(
select MIN(score) tScore,pid tPid from @tt where pid in
(select pid from @tt group by pid having count (pid) > 1) group by pid
) t2 on t1.pid=t2.tPid and t1.score=t2.tScore)
select * from @tt
I can't see your query, so I've made up this example...
SELECT
PID,
MAX(Score)
FROM
tblTable
GROUP BY
PID