sql数据库删除重复记录方法示例__name_select_val_from_where_

当前位置: 数据库>sqlserver

sql数据库删除重复记录方法示例

来源: 互联网发布时间：2014-08-29

本文导语: 在sql数据库中，如果数据结构设计不够严谨，会产生越来越多的重复数据，就不得不寻找sql server 查询重复记录的多种方法，找到重复数据以后，接下来就是使用distinct筛选重复记录，继而完成重复数据的删除。这里推荐一条...

在sql数据库中，如果数据结构设计不够严谨，会产生越来越多的重复数据，就不得不寻找sql server 查询重复记录的多种方法，找到重复数据以后，接下来就是使用distinct筛选重复记录，继而完成重复数据的删除。

这里推荐一条sql语句删除表中重复记录的方法，大多数时候可以解决我们遇到的问题。

如果遇到复杂些的重复记录处理方法，可以参考下面介绍的一些sql语句实例。

分享四种删除重复记录的方式，用于不同的情况。

1、查找表中多余的重复记录，重复记录是根据单个字段（peopleId）来判断

代码示例:

select * from people  where peopleId in (select   peopleId from   people group by   peopleId having count(peopleId) > 1)

2、sql删除重复记录，重复记录是根据单个字段（peopleid）来判断，只留有rowid最小的记录

代码示例:

delete from people  where peopleId in (select   peopleId from people group by   peopleId   having count(peopleId) > 1)  and rowid not in (select min(rowid) from   people group by peopleId having count(peopleId )>1)    

3、查找表中多余的重复记录（多个字段）

代码示例:

select * from vitae a  where (a.peopleId,a.seq) in   (select peopleId,seq from vitae group by peopleId,seq having count(*) > 1)  

4、删除表中多余的重复记录（多个字段），只留有rowid最小的记录

代码示例:

delete from vitae a  where (a.peopleId,a.seq) in   (select peopleId,seq from vitae group by peopleId,seq having count(*) > 1)  and rowid not in (select min(rowid) from vitae group by peopleId,seq having count(*)>1)   

附，去除重复记录sql语句。

对重复记录的处理，对搜索结果影响最大。
如何去除数据库中的重复记录呢？（sql语句大全）

--按某一字段分组取最大(小)值所在行的数据
/*
数据如下：
name val memo
a    2   a2(a的第二个值)
a    1   a1--a的第一个值
a    3   a3:a的第三个值
b    1   b1--b的第一个值
b    3   b3:b的第三个值
b    2   b2b2b2b2
b    4   b4b4
b    5   b5b5b5b5b5
*/
--创建表并插入数据：
create table tb(name varchar(10),val int,memo varchar(20))
insert into tb values('a',    2,   'a2(a的第二个值)')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('b',    1,   'b1--b的第一个值')
insert into tb values('b',    3,   'b3:b的第三个值')
insert into tb values('b',    2,   'b2b2b2b2')
insert into tb values('b',    4,   'b4b4')
insert into tb values('b',    5,   'b5b5b5b5b5')
go

--一、按name分组取val最大的值所在行的数据。
--方法1：
select a.* from tb a where val = (select max(val) from tb where name = a.name) order by a.name
--方法2：
select a.* from tb a where not exists(select 1 from tb where name = a.name and val > a.val)
--方法3：
select a.* from tb a,(select name,max(val) val from tb group by name) b where a.name = b.name and a.val = b.val order by a.name
--方法4：
select a.* from tb a inner join (select name , max(val) val from tb group by name) b on a.name = b.name and a.val = b.val order by a.name
--方法5
select a.* from tb a where 1 > (select count(*) from tb where name = a.name and val > a.val ) order by a.name
/*
name       val         memo
---------- ----------- --------------------
a          3           a3:a的第三个值
b          5           b5b5b5b5b5
*/

--二、按name分组取val最小的值所在行的数据。
--方法1：
select a.* from tb a where val = (select min(val) from tb where name = a.name) order by a.name
--方法2：
select a.* from tb a where not exists(select 1 from tb where name = a.name and val < a.val)
--方法3：
select a.* from tb a,(select name,min(val) val from tb group by name) b where a.name = b.name and a.val = b.val order by a.name
--方法4：
select a.* from tb a inner join (select name , min(val) val from tb group by name) b on a.name = b.name and a.val = b.val order by a.name
--方法5
select a.* from tb a where 1 > (select count(*) from tb where name = a.name and val < a.val) order by a.name
/*
name       val         memo
---------- ----------- --------------------
a          1           a1--a的第一个值
b          1           b1--b的第一个值
*/

--三、按name分组取第一次出现的行所在的数据。
select a.* from tb a where val = (select top 1 val from tb where name = a.name) order by a.name
/*
name       val         memo
---------- ----------- --------------------
a          2           a2(a的第二个值)
b          1           b1--b的第一个值
*/

--四、按name分组随机取一条数据。
select a.* from tb a where val = (select top 1 val from tb where name = a.name order by newid()) order by a.name
/*
name       val         memo
---------- ----------- --------------------
a          1           a1--a的第一个值
b          5           b5b5b5b5b5
*/

--五、按name分组取最小的两个(N个)val
select a.* from tb a where 2 > (select count(*) from tb where name = a.name and val < a.val ) order by a.name,a.val
select a.* from tb a where val in (select top 2 val from tb where name=a.name order by val) order by a.name,a.val
select a.* from tb a where exists (select count(*) from tb where name = a.name and val < a.val having Count(*) < 2) order by a.name,a.val
/*
name       val         memo
---------- ----------- --------------------
a          1           a1--a的第一个值
a          2           a2(a的第二个值)
b          1           b1--b的第一个值
b          2           b2b2b2b2
*/

--六、按name分组取最大的两个(N个)val
select a.* from tb a where 2 > (select count(*) from tb where name = a.name and val > a.val ) order by a.name,a.val
select a.* from tb a where val in (select top 2 val from tb where name=a.name order by val desc) order by a.name,a.val
select a.* from tb a where exists (select count(*) from tb where name = a.name and val > a.val having Count(*) < 2) order by a.name , a.val
/*
name       val         memo
---------- ----------- --------------------
a          2           a2(a的第二个值)
a          3           a3:a的第三个值
b          4           b4b4
b          5           b5b5b5b5b5
*/
--七，如果整行数据有重复，所有的列都相同。
/*
数据如下：
name val memo
a    2   a2(a的第二个值)
a    1   a1--a的第一个值
a    1   a1--a的第一个值
a    3   a3:a的第三个值
a    3   a3:a的第三个值
b    1   b1--b的第一个值
b    3   b3:b的第三个值
b    2   b2b2b2b2
b    4   b4b4
b    5   b5b5b5b5b5
*/
--在sql server 2000中只能用一个临时表来解决，生成一个自增列，先对val取最大或最小，然后再通过自增列来取数据。
--创建表并插入数据：
create table tb(name varchar(10),val int,memo varchar(20))
insert into tb values('a',    2,   'a2(a的第二个值)')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('b',    1,   'b1--b的第一个值')
insert into tb values('b',    3,   'b3:b的第三个值')
insert into tb values('b',    2,   'b2b2b2b2')
insert into tb values('b',    4,   'b4b4')
insert into tb values('b',    5,   'b5b5b5b5b5')
go

select * , px = identity(int,1,1) into tmp from tb

select m.name,m.val,m.memo from
(
select t.* from tmp t where val = (select min(val) from tmp where name = t.name)
) m where px = (select min(px) from
(
select t.* from tmp t where val = (select min(val) from tmp where name = t.name)
) n where n.name = m.name)

drop table tb,tmp

/*
name       val         memo
---------- ----------- --------------------
a          1           a1--a的第一个值
b          1           b1--b的第一个值

(2 行受影响)
*/
--在sql server 2005中可以使用row_number函数，不需要使用临时表。
--创建表并插入数据：
create table tb(name varchar(10),val int,memo varchar(20))
insert into tb values('a',    2,   'a2(a的第二个值)')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('b',    1,   'b1--b的第一个值')
insert into tb values('b',    3,   'b3:b的第三个值')
insert into tb values('b',    2,   'b2b2b2b2')
insert into tb values('b',    4,   'b4b4')
insert into tb values('b',    5,   'b5b5b5b5b5')
go

select m.name,m.val,m.memo from
(
select * , px = row_number() over(order by name , val) from tb
) m where px = (select min(px) from
(
select * , px = row_number() over(order by name , val) from tb
) n where n.name = m.name)

drop table tb

/*
name       val         memo
---------- ----------- --------------------
a          1           a1--a的第一个值
b          1           b1--b的第一个值

(2 行受影响)
*/