优化子查询的性能(Optimize performance of sub-queries)

2019-11-01 21:56发布

我从表第一查询需要约40秒,创造超过80,000行。 我想通过网站,序列,道达尔和任何操作系统版本,以获得Windows 7的应用程序的计数。

这些子查询工作,但他们当然大大延缓下跌过程。 花了3.5小时即可运行。

有没有更有效的方式来做到这一点?

输出:

SoftwareName    Sequence    Site    Win7/site       Win7Installs/seq    TotWin7apps TotalInstalls
Adobe Acrobat       1       BKN         1                   5               626         7854
AutoCAD LT          1       BKN         1                   1               3           15
Adobe Acrobat       1       CTW         4                   5               626         7854
Adobe Captivate     1       CTW         1                   1               8           60

查询:

WITH PCapps AS (
SELECT DISTINCT
    Computer,
    Sequence,
    Site,
    SoftwareName,
    OS
FROM table
)

SELECT DISTINCT
    SoftwareName,
    Sequence,
    Site,
    (SELECT COUNT(p1.SoftwareName) FROM PCapps p1 WHERE p1.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise' AND p1.Site = pc.Site) as 'Win7/site',
    (SELECT COUNT(p1.SoftwareName) FROM PCapps p1 WHERE p1.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise' AND p1.Sequence = pc.Sequence) as 'Win7Installs/seq',
    (SELECT COUNT(p2.SoftwareName) FROM PCapps p2 WHERE p2.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise') as TotWin7apps,
    (SELECT COUNT(p3.SoftwareName) FROM PCapps p3 WHERE p3.SoftwareName = pc.SoftwareName) as TotalInstalls 
    FROM PCapps pc

**更新:

使用从@Jason卡特的技巧,我创建了几个#TEMP表,加入他们的行列。 结果运行在不到一分钟内(不超过初始查询长得多)非常快。 这种方法比,因为我是按照他最初的提示创建#TEMP表@ JasonCarter的解决方案略有不同。 我创建了几个#TEMP表,每个表包括COUNT()秒。

SELECT DISTINCT
    Computer,
    Sequence,
    Site,
    SoftwareName,
    OS
INTO #PCapps
      FROM TABLE    

SELECT
    SoftwareName,
    Site,
    COUNT(SoftwareName) AS [SiteInstalls]
INTO #SiteInstalls
FROM #PCapps 
    WHERE OS = 'Windows 7 Enterprise'
    GROUP BY Site, SoftwareName

SELECT
    SoftwareName,
    Sequence,
    COUNT(SoftwareName) AS [SeqInstalls]
INTO #SeqInstalls
FROM #PCapps
    WHERE OS = 'Windows 7 Enterprise'
    GROUP BY Sequence, SoftwareName

SELECT
    SoftwareName,
    COUNT(SoftwareName) AS [Win7Installs]
INTO #Win7Installs
FROM #PCapps
    WHERE OS = 'Windows 7 Enterprise'
    GROUP BY SoftwareName

SELECT
    SoftwareName,
    COUNT(SoftwareName) AS [AppInstalls]
INTO #AppInstalls
FROM #PCapps
    GROUP BY SoftwareName   

SELECT
    pc.SoftwareName,
    pc.Sequence,
    pc.Site,
    sit7.SiteInstalls,
    seq7.SeqInstalls,
    w7.Win7Installs,
    ai.AppInstalls
    FROM #PCapps pc
    LEFT OUTER JOIN #SiteInstalls sit7 ON sit7.SoftwareName = pc.SoftwareName AND sit7.Site = pc.Site
    LEFT OUTER JOIN #SeqInstalls  seq7 ON seq7.SoftwareName = pc.SoftwareName AND seq7.Sequence = pc.Sequence
    LEFT OUTER JOIN #Win7Installs w7   ON   w7.SoftwareName = pc.SoftwareName
    LEFT OUTER JOIN #AppInstalls ai    ON   ai.Softwarename = pc.SoftwareName

DROP TABLE #PCapps
DROP TABLE #SiteInstalls
DROP TABLE #SeqInstalls
DROP TABLE #Win7Installs
DROP TABLE #AppInstalls

Answer 1:

我会尝试先运行子查询到一个临时表来收集你的罪状,然后将你的总次数。 随着你的方式让查询设置它要运行的每一个子查询的一次PCapps的每一行,这就是为什么要花这么长时间。

尝试是这样的:

declare @t table (Computer varchar(10), Sequence int, Site varchar(3), SoftwareName varchar(20), OS varchar(20) )

insert into @t values
 ('C1',1,'BKN','Adobe Acrobat','Win7')
,('C2',1,'BKN','Adobe Acrobat','Win7')
,('C3',1,'BKN','Adobe Acrobat','Win7')
,('C1',1,'BKN','AutoCAD LT ','Win7')
,('C3',1,'BKN','AutoCAD LT ','Win7')
,('B1',2,'CTW','Adobe Acrobat','Win7')
,('B2',2,'CTW','Adobe Acrobat','Win7')
,('B3',3,'CTW','Adobe LT','Win7')
,('B4',3,'CTW','Adobe Acrobat','Win7')
,('A1',2,'CTW','Adobe Acrobat','Win7')
,('A2',2,'CTW','Adobe LT','Win7')
,('A3',2,'CTW','Adobe Acrobat','Win7')
,('X4',3,'CTW','Adobe Acrobat','WinXP')
,('X1',2,'CTW','Adobe Acrobat','WinXP')
,('X2',2,'CTW','Adobe LT','WinXP')
,('X3',2,'CTW','Adobe Acrobat','WinXP')
,('A4',2,'CTW','Adobe Acrobat','Win7');

SELECT 
    Site,
    OS,
    SoftwareName, 
    Sequence,
    COUNT(SoftwareName)  as 'Count'
into #SiteSpecific
FROM @t
group by Site, OS, SoftwareName, Sequence;


WITH PCapps AS (
SELECT DISTINCT
    Computer,
    Sequence,
    Site,
    SoftwareName,
    OS
FROM @t
)
SELECT 
    DISTINCT
    PC.SoftwareName,
    PC.Sequence,
    PC.Site,
    PerSeq.[Count] as 'Win7Installs/seq',
    PerSite.[Count] as TotWin7apps,
    total.[Count] as TotalInstalls 
FROM PCapps pc
inner join (select SoftwareName, Sequence, OS, sum([Count]) [Count] from #SiteSpecific group by SoftwareName, Sequence, OS) PerSeq 
    on PerSeq.SoftwareName = pc.Softwarename 
    AND PerSeq.Sequence = pc.Sequence
    AND PerSeq.OS = pc.OS
inner join (select SoftwareName, Site, OS, sum([Count]) [Count] from #SiteSpecific group by SoftwareName, Site, OS) PerSite 
    on PerSite.SoftwareName = pc.Softwarename 
    AND PerSite.Site = pc.Site
    AND PerSite.OS = pc.OS
inner join (select SoftwareName, sum([Count]) [Count] from #SiteSpecific group by SoftwareName) Total 
    on Total.SoftwareName = pc.Softwarename 
where Pc.OS='Win7'
order by SoftwareName, Sequence, Site


Answer 2:

真是奇了,我...优化采用并行的优势,使子查询效率非常高。 我填充的假表120K记录,并试图以下。 第三个查询仅略多于第一(你的)有效,但在更复杂的成本。 我会离开你的原样。 有可能是一个更好的解决方案,但你看起来不够好给我。 多久你需要处理80K行?

WITH PCapps2 AS (
SELECT DISTINCT
    Computer,
    Sequence,
    Site,
    SoftwareName,
    OS
FROM pcapps
)
SELECT DISTINCT
    SoftwareName,
    Sequence,
    Site,
    (SELECT COUNT(p1.SoftwareName) FROM PCapps2 p1 WHERE p1.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise' AND p1.Site = pc.Site) as 'Win7/site',
    (SELECT COUNT(p1.SoftwareName) FROM PCapps2 p1 WHERE p1.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise' AND p1.Sequence = pc.Sequence) as 'Win7Installs/seq',
    (SELECT COUNT(p2.SoftwareName) FROM PCapps2 p2 WHERE p2.SoftwareName = pc.Softwarename AND OS = 'Windows 7 Enterprise') as TotWin7apps,
    (SELECT COUNT(p3.SoftwareName) FROM PCapps2 p3 WHERE p3.SoftwareName = pc.SoftwareName) as TotalInstalls 
    FROM PCapps2 pc

;WITH PCapps2 AS (
SELECT DISTINCT
    Computer,
    Sequence,
    Site,
    SoftwareName,
    OS
FROM pcapps
)
SELECT DISTINCT
    SoftwareName,
    Sequence,
    Site,
    COUNT(case when os = 'Windows 7 Enterprise' then 1 end) over (partition by pc.Softwarename,pc.Site) as 'Win7/site',
    COUNT(case when os = 'Windows 7 Enterprise' then 1 end) over (partition by pc.Softwarename,pc.Sequence) as 'Win7Installs/seq',
    COUNT(case when os = 'Windows 7 Enterprise' then 1 end) over (partition by pc.Softwarename) as TotWin7apps,
    COUNT(*) over (partition by pc.Softwarename) as TotalInstalls
    FROM PCapps2 pc

;WITH PCapps2 AS (
SELECT DISTINCT
    Computer,
    Sequence,
    Site,
    SoftwareName,
    OS
FROM pcapps
), a as (
    select softwarename, site, COUNT(*) as 'Win7/site'
    from pcapps2
    where os = 'Windows 7 Enterprise'
    group by softwarename, site
), b as (
    select softwarename, sequence, COUNT(*) as 'Win7Installs/seq'
    from pcapps2
    where os = 'Windows 7 Enterprise'
    group by softwarename, sequence
), c as (
    select softwarename, COUNT(case when os = 'Windows 7 Enterprise' then 1 end) as TotWin7apps, COUNT(*) as TotalInstalls
    from pcapps2
    group by softwarename
), d as (
    select distinct softwarename, sequence, site
    from    pcapps2
)
select d.*, isnull(a.[Win7/site], 0) as [Win7/site], isnull(b.[Win7Installs/seq],0) as [Win7Installs/seq] 
        , isnull(c.TotWin7apps,0) as TotWin7apps, isnull(c.TotalInstalls, 0) as TotalInstalls  
from    d
        left join a on d.softwarename = a.softwarename and d.site = a.site 
        left join b on d.softwarename = b.softwarename and d.sequence = b.sequence 
        left join c on d.softwarename = c.softwarename 

首先查询:表“pcapps”。 扫描计数15,逻辑读取6630,物理读取0,预读0,lob逻辑读取0,lob物理读取0次,lob预读0表“工作台”。 扫描计数0,逻辑读取0次,物理读取0次,预读0,lob逻辑读取0,lob物理读取0次,lob预读0。

第二个查询:表“pcapps”。 扫描计数3,逻辑读取1326,物理读取0,预读0,lob逻辑读取0,lob物理读取0次,lob预读0表“工作台”。 扫描计数18,逻辑读取1983591,物理读取0,预读0,lob逻辑读取0,lob物理读取0次,lob预读0。

第三个查询:表“pcapps”。 扫描计数12,逻辑读取5304,物理读取0,预读0,lob逻辑读取0,lob物理读取0次,lob预读0表“工作台”。 扫描计数0,逻辑读取0次,物理读取0次,预读0,lob逻辑读取0,lob物理读取0次,lob预读0。



文章来源: Optimize performance of sub-queries