可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
I'm writing a query like this:
select * from myTable where X in (select X from Y) and XX in (select X from Y)
Values from columns X and XX has to be in the result of the same query: select X from Y
.
I think that this query is invoked twice so its senseless. Is there any other option I can write this query more efficiently? Maybe temp table?
回答1:
Actually no, there isn't a smarter way to write this (without visiting Y twice) given the X that myTable.X and myTable.YY matches to may not be from the same row.
As an alternative, the EXISTS form of the query is
select *
from myTable A
where exists (select * from Y where A.X = Y.X)
and exists (select * from Y where A.XX = Y.X)
If Y contains X values of 1,2,3,4,5
, and x.x = 2
and x.xx = 4
, they both exist (on different records in Y) and the record from myTable should be shown in output.
EDIT: This answer previously stated that You could rewrite this using _EXISTS_ clauses which will work faster than _IN_
. AS Martin has pointed out, this is not true (certainly not for SQL Server 2005 and above). See links
- http://explainextended.com/2009/06/16/in-vs-join-vs-exists/
- http://sqlinthewild.co.za/index.php/2009/08/17/exists-vs-in/
回答2:
It will probably not be particularly efficient to try to write this query by only referencing Y
once. However, given that you are using SQL Server 2008, there are variations that can be used:
Select ...
From MyTable As T
Where Exists (
Select 1
From Y
Where Y.X = T.X
Intersect
Select 1
From Y
Where Y.X = T.XX
)
Addition
Actually, I can think of a way you could do it without using Y
more than once (Nothing was said about using MyTable
more than once). However, this is more for academic reasons as I think that using my first solution will likely perform better:
Select ...
From MyTable As T
Where Exists (
Select 1
From Y
Where Exists(
Select 1
From MyTable1 As T1
Where T1.X = Y.X
Intersect
Select 1
From MyTable1 As T2
Where T2.XX = Y.X
)
And Y.X In(T.X, T.XX)
)
回答3:
WITH
w_tmp AS(
SELECT x
FROM y
)
SELECT *
FROM myTable
WHERE x IN (SELECT x FROM w_tmp)
AND xx IN (SELECT x FROM w_tmp)
(I've read this in Oracle docs, but I think MS able to do this optimizations too)
This way optimizer knows for sure that you are doing same query and can create temporary table to cash results (But it's still up to optimizer to decide whether it's worth it. For tiny queries, overhead of creating temp table can be too high).
Also (and actually this is way more important for me), when subquery is 50 lines, it's easier for human to see, that the same thing is used in both cases. Pretty much like factoring long functions into subroutines
Docs on MSDN
回答4:
Not sure what the problem is but isn't simple JOIN an answer?
SELECT t.*
FROM myTable
JOIN Y y1 ON y1.X = myTable.X
JOIN Y y2 ON y2.X = myTable.XX
or
SELECT t.*
FROM myTable, Y y1, Y y2
WHERE y1.X = myTable.X AND y2.X = myTable.XX
ADDED: if there is a strong need to eliminate a second query for Y, let's reverse the logic:
;WITH A(X)
AS (
-- this will select all values that can be found in Y and myTable X and XX fields.
SELECT Y.X -- if there are a lot of dups, add DISTINCT
FROM Y, myTable
WHERE Y.X IN (myTable.X, myTableXX)
)
-- now join back to the orignal table and filter.
SELECT t.*
FROM myTable
-- similar to what has been mentioned before
WHERE EXISTS(SELECT TOP 1 * from A where A.X = myTable.X)
AND EXISTS(SELECT TOP 1 * from A where A.X = myTable.XX)
If you don't like WITH, you may use SELECT INTO clause and create in-memory table.