T-SQL COALESCE GROUPING SETS into single column wi

2019-07-06 17:10发布

问题:

A thesaurus database where terms and categories are linked to each other and running SQL Server 2008. Based on this and this answers. Here is a sample:

CREATE TABLE #term (termid VARCHAR(8), en VARCHAR(32), enscope VARCHAR(32))
CREATE TABLE #link (linkid VARCHAR(10), termid VARCHAR(8), reltype VARCHAR(2), refid VARCHAR(8))
CREATE TABLE #categorylink (code VARCHAR(3), termid VARCHAR(8))

INSERT INTO #term VALUES ('100', 'ABC', 'abc_scopenote')
INSERT INTO #term VALUES ('120', 'DEF', 'def_scopenote')
INSERT INTO #term VALUES ('150', 'GHI', NULL)

INSERT INTO #link VALUES ('1', '100', 'NT', '120')
INSERT INTO #link VALUES ('2', '100', 'NT', '150')
INSERT INTO #link VALUES ('3', '120', 'BT', '100')
INSERT INTO #link VALUES ('4', '120', 'RT', '150')
INSERT INTO #link VALUES ('5', '150', 'BT', '100')
INSERT INTO #link VALUES ('6', '150', 'RT', '120')

INSERT INTO #categorylink VALUES ('S01', '100')
INSERT INTO #categorylink VALUES ('S02', '100')
INSERT INTO #categorylink VALUES ('B04', '150')

SELECT
       CASE
       WHEN #term.enscope IS NULL AND refterm.en IS NULL AND #categorylink.code IS NULL
       THEN #term.en
       ELSE NULL
       END,
       CHAR(9) + 'SN ' + #term.enscope,
       CHAR(9) + #link.reltype + CHAR(32) + refterm.en,
       CHAR(9) + 'CODE ' + #categorylink.code
    FROM #link
        INNER JOIN #term ON #term.termid = #link.termid
        INNER JOIN #term AS refterm ON refterm.termid = #link.refid
        LEFT JOIN #categorylink ON #term.termid = #categorylink.termid

    GROUP BY GROUPING SETS (#term.en, (#term.en, #term.enscope), (#term.en, #link.linkid, #link.reltype, refterm.en), (#term.en, #categorylink.code))
    ORDER BY #term.en, #categorylink.code, #link.linkid, #term.enscope
GO

DROP TABLE #term
DROP TABLE #link
DROP TABLE #categorylink
GO

If there is NULL in 'enscope' I've got a duplicate row.

If there is no 'categorylink' value I've got a duplicate row.

How to avoid this, please?

I want to COALESCE them all into a single column without duplicates.

; WITH CTEterm AS (
    SELECT
       ROW_NUMBER() OVER (PARTITION BY #term.en, refterm.en ORDER BY #term.en) AS rownumber,
       #term.en AS mainterm,
       CHAR(9) + 'SN ' + #term.enscope AS scopenote,
       CHAR(9) + #link.reltype + CHAR(32) + refterm.en AS subterms,
       CHAR(9) + 'CODE ' + #categorylink.code AS codes
    FROM #link
       INNER JOIN #term ON #term.termid = #link.termid
       INNER JOIN #term AS refterm ON refterm.termid = #link.refid
       LEFT JOIN #categorylink ON #term.termid = #categorylink.termid
)

SELECT COALESCE(
    CASE
    WHEN rownumber = 1
    THEN mainterm
    ELSE NULL
    END,
    scopenote,
    subterms,
    codes
)
FROM CTEterm
GROUP BY GROUPING SETS ((mainterm, rownumber), (mainterm, scopenote), (mainterm, subterms), (mainterm, codes))
ORDER BY mainterm, codes, subterms, scopenote

GO

Basically how to avoid using 'ELSE NULL' in CASE (like 'else skip row')?

This is what I get using COALESCE

ABC
NULL
    SN abc_scopenote
    NT DEF
    NT GHI
    CODE S01
    CODE S02
NULL
DEF
    SN def_scopenote
    BT ABC
    RT GHI
NULL
GHI
    BT ABC
    RT DEF
    CODE B04

This is what I need

ABC
    SN abc_scopenote
    NT DEF
    NT GHI
    CODE S01
    CODE S02
DEF
    SN def_scopenote
    BT ABC
    RT GHI
GHI
    BT ABC
    RT DEF
    CODE B04

Same question here

回答1:

Sorry if this turns out not really what you expected, but if you simply need to get rid of NULLs then I fail to see why you can't just do like this:

;WITH CTEterm AS (
    SELECT
       ROW_NUMBER() OVER (PARTITION BY #term.en, refterm.en
                              ORDER BY #term.en) AS rownumber,
       #term.en AS mainterm,
       CHAR(9) + 'SN ' + #term.enscope AS scopenote,
       CHAR(9) + #link.reltype + CHAR(32) + refterm.en AS subterms,
       CHAR(9) + 'CODE ' + #categorylink.code AS codes
    FROM #link
       INNER JOIN #term ON #term.termid = #link.termid
       INNER JOIN #term AS refterm ON refterm.termid = #link.refid
       LEFT JOIN #categorylink ON #term.termid = #categorylink.termid
)
SELECT
  AggValue
FROM (
  SELECT
    mainterm, codes, subterms, scopenote,
    COALESCE(
      CASE WHEN rownumber = 1 THEN mainterm ELSE NULL END,
      scopenote,
      subterms,
      codes
    ) AS AggValue
  FROM CTEterm
  GROUP BY GROUPING SETS ((mainterm, rownumber), (mainterm, scopenote),
                          (mainterm, subterms), (mainterm, codes))
) s
WHERE AggValue IS NOT NULL
ORDER BY mainterm, codes, subterms, scopenote

Note: ELSE NULL is removed here only because it changes nothing (NULL is implied when there's no ELSE), not because you would gain anything from removing it.