How to optimize huge query with repeated subquerie

2019-05-21 13:50发布

问题:

I have the following huge query that contains repeated subqueries , It looks really inefficient to me. How can i optimize it ?

SELECT T2.date1, T2.date2, T2.period, T1.market, T1.ticker, 0 AS scenario
FROM
(SELECT  DISTINCT
        Q.market AS market,
        Q.ticker AS ticker

FROM portfolio.scenario S RIGHT JOIN portfolio.quote Q
ON    S.series =  (SELECT S.series
                  FROM scenario S
                  WHERE S.date1 >= '2009-09-01' AND
                        S.date2 <= '2010-07-01' AND
                        S.period = 'QUARTER'
                  ORDER BY S.date2
                  LIMIT 1) AND
      Q.market = S.market AND 
      Q.ticker = S.ticker

WHERE  Q.date = '2010-07-01' AND
       S.date1 IS NULL) AS T1

JOIN 

(SELECT DISTINCT S.date1, S.date2, S.period
FROM scenario S
WHERE S.series = (SELECT S.series
                  FROM scenario S
                  WHERE S.date1 >= '2009-09-01' AND
                        S.date2 <= '2010-07-01' AND
                        S.period = 'QUARTER'
                  ORDER BY S.date2
                  LIMIT 1) AND
      S.date1 >= '2009-09-01' AND
      S.date2 <= '2010-07-01') AS T2

UNION

SELECT  S.date1 AS date1, 
        S.date2 AS date2,
        S.period AS period,
        Q.market AS market,
        Q.ticker AS ticker,
        Q.close * EXP(S.ratio) AS scenario

FROM portfolio.scenario S , portfolio.quote Q

WHERE  S.series = (SELECT S.series
                  FROM scenario S
                  WHERE S.date1 >= '2009-09-01' AND
                        S.date2 <= '2010-07-01' AND
                        S.period = 'QUARTER'
                  ORDER BY S.date2
                  LIMIT 1) AND
      S.date1 >= '2009-09-01' AND
      S.date2 <= '2010-07-01' AND
      Q.date = '2010-07-01' AND
      Q.market = S.market AND 
      Q.ticker = S.ticker

UNION

SELECT T2.date1, T2.date2, T2.period, T1.market, T1.ticker, 0 AS scenario
FROM
(SELECT  DISTINCT
        Q.market AS market,
        Q.ticker AS ticker

        FROM portfolio.scenario S , portfolio.quote Q
        WHERE  Q.date = '2010-07-01' AND
              Q.market = S.market AND 
              Q.ticker = S.ticker AND
              S.series = (SELECT S.series
                  FROM scenario S
                  WHERE S.date1 >= '2009-09-01' AND
                        S.date2 <= '2010-07-01' AND
                        S.period = 'QUARTER'
                  ORDER BY S.date2
                  LIMIT 1) AND
              S.date1 >= '2009-09-01' AND
              S.date2 <= '2010-07-01' ) AS T1

JOIN 

(SELECT DISTINCT S.date1, S.date2, S.period
FROM scenario S
WHERE S.series = (SELECT S.series
                  FROM scenario S
                  WHERE S.date1 >= '2009-09-01' AND
                        S.date2 <= '2010-07-01' AND
                        S.period = 'QUARTER'
                  ORDER BY S.date2
                  LIMIT 1) AND
      S.date1 >= '2009-09-01' AND
      S.date2 <= '2010-07-01') AS T2


WHERE (T2.date1, T2.date2, T2.period, T1.market, T1.ticker)
        NOT IN (SELECT  S.date1 AS date1, 
                S.date2 AS date2,
                S.period AS period,
                Q.market AS market,
                Q.ticker AS ticker

        FROM portfolio.scenario S , portfolio.quote Q
        WHERE  Q.date = '2010-07-01' AND
              Q.market = S.market AND 
              Q.ticker = S.ticker AND
              S.series = (SELECT S.series
                  FROM scenario S
                  WHERE S.date1 >= '2009-09-01' AND
                        S.date2 <= '2010-07-01' AND
                        S.period = 'QUARTER'
                  ORDER BY S.date2
                  LIMIT 1) AND
              S.date1 >= '2009-09-01' AND
              S.date2 <= '2010-07-01' )


ORDER BY 
date1,date2,period,market,ticker

After @Bruce 's comment and some logic to reduce a subquery my query now is:

(SELECT S.date1, 
        S.date2,
        S.period,
        Q.market,
        Q.ticker,
        Q.close * EXP(S.ratio) AS scenario

FROM portfolio.scenario S , portfolio.quote Q

WHERE  
      S.date1 >= (@date1 := '2009-09-01') AND
      S.date2 <= (@date2 := '2010-07-01') AND
      Q.date = (@qdate := '2010-07-01') AND
      S.series = 
      (@series := 
                  (SELECT S.series
                  FROM scenario S
                  WHERE S.date1 >= '2009-09-01' AND
                        S.date2 <= '2010-07-01' AND
                        S.period = 'QUARTER'
                  ORDER BY S.date2
                  LIMIT 1)) AND
      Q.market = S.market AND 
      Q.ticker = S.ticker)

UNION

(SELECT T2.date1, T2.date2, T2.period, T1.market, T1.ticker, 0 AS scenario
FROM
(SELECT Q.market, Q.ticker
 FROM quote Q
 WHERE Q.date = @qdate) AS T1

JOIN 

(SELECT DISTINCT S.date1, S.date2, S.period
FROM scenario S
WHERE S.series = @series AND
      S.date1 >= @date1 AND
      S.date2 <= @date2) AS T2

WHERE (T2.date1, T2.date2, T2.period, T1.market, T1.ticker)
        NOT IN 

        (SELECT  S.date1,
                 S.date2,
                 S.period,
                 Q.market,
                 Q.ticker
        FROM portfolio.scenario S , portfolio.quote Q
        WHERE  Q.date = @qdate AND
               Q.market = S.market AND 
               Q.ticker = S.ticker AND
               S.series = @series AND
               S.date1 >= @date1 AND
               S.date2 <= @date2 ))

However, If i changed

  (@series := 
              (SELECT S.series
              FROM scenario S
              WHERE S.date1 >= '2009-09-01' AND
                    S.date2 <= '2010-07-01' AND
                    S.period = 'QUARTER'
              ORDER BY S.date2
              LIMIT 1))

to be

  (@series := 
              (SELECT S.series
              FROM scenario S
              WHERE S.date1 >= @date1 AND
                    S.date2 <= @date2 AND
                    S.period = 'QUARTER'
              ORDER BY S.date2
              LIMIT 1))

It takes too much time to process it (i have executed the query 10 mins ago and still did not get the result) while the query normally returns in 5 seconds.

Also when i reset the variables , execute the result is not correct (probably use the variable's value from the previous execution). How can i change that without adding SET statements (I would like it to be a single query)

回答1:

Use MySQL variables:

SELECT
    @x := ColumnName,
    @y := ColumnName2 + @z,
    @z := (SELECT * FROM SubTable WHERE x = @x),
    (SELECT * FROM Table2 WHERE X = @z),
    (SELECT * FROM Table3 WHERE X = @z)
FROM Table
WHERE
    v = @v
  • You can assign subselect and column values to SQL variables
  • You can refer to these variables anywhere in the statement
  • The variables contain their value from previous rows (if set)
  • You can reuse subselects and other values in this way