Select unique column names from a list of joined t

2020-02-15 05:48发布

问题:

I have a list of tables that can be joined together by the same PK column. Since this list of tables can vary from project to project, I want to create a query that can be dynamic enough to pull all unique columns from these tables.

For example, I have three tables below:
Table A (PK field, column1, column 2)
Table B (PK field, column3, column 4)
Table C (PK field, column5, column 5)

These three tables are joined on "PK field" column, and I want the query output to be something like:

PK field  column1  column2  column3  column4  column5
..data..  ..data.. ..data.. ..data.. ..data.. ..data..

At the end, this query will be part of a SQL function or SP, so the user can define a list of tables, and PK field at the beginning, then executing it shall return my expected output with dataset.

I think about use this query below but the result is not what I like:

SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = ''

Any advice about how I should design this SP or function, will be appreciated.

Thanks in advance.

DDL for two example tables:

CREATE TABLE [dbo].[G_bDEM](
    [blaiseKey_code] [nvarchar](255) NULL,
    [qSex] [int] NULL,
    [qDOB] [datetime] NULL,
    [qDOBNR] [int] NULL,
    [qAge] [int] NULL,
    [qAgeNR] [int] NULL,
    [qAgeRange] [int] NULL,
    [qAge15OrOver] [int] NULL,
    [qNotEligible] [nvarchar](1) NULL,
    [qBornInNZ] [int] NULL,
    [qCountryOfBirth] [nvarchar](2) NULL,
    [qArriveNZYr] [int] NULL,
    [qArriveNZYrNR] [int] NULL,
    [qArriveNZMth] [int] NULL,
    [bDEM_BOP_qHowManyRaised] [int] NULL,
    [bDEM_BOP_q1stParentBornNZ] [int] NULL,
    [bDEM_BOP_q2ndParentBornNZ] [int] NULL,
    [bDEM_BOP_qHowManyParentBornNZ] [int] NULL,
    [qMaoriDescent] [int] NULL,
    [qSchQual] [int] NULL,
    [qSchQualOth] [nvarchar](200) NULL,
    [qSchQualOthNR] [int] NULL,
    [qSchQualYr] [int] NULL,
    [qSchQualYrNR] [int] NULL,
    [qPostSchQual] [int] NULL,
    [q3MthsStudy] [int] NULL,
    [qHighestQual] [int] NULL,
    [qHighestQualOth] [nvarchar](200) NULL,
    [qHighestQualOthNR] [int] NULL,
    [qHighestQualYr] [int] NULL,
    [qHighestQualYrNR] [int] NULL,
    [qWorkIntro] [nvarchar](1) NULL,
    [qDidPaidWork] [int] NULL,
    [qAwayFromWork] [int] NULL,
    [qFamilyBusWork] [int] NULL,
    [bDEM_WOR_qPaidWorkIntro] [nvarchar](1) NULL,
    [bDEM_WOR_qJobsNum] [int] NULL,
    [bDEM_WOR_qJobsNumNR] [int] NULL,
    [bDEM_WOR_tabDEM_T2_fTotMins] [int] NULL,
    [bDEM_WOR_q2JobsNoHrsIntro] [nvarchar](1) NULL,
    [bDEM_WOR_q2Jobs2HrsIntro] [nvarchar](1) NULL,
    [bDEM_WOR_q2Jobs1HrsIntro] [nvarchar](1) NULL,
    [bDEM_WOR_qOccupation] [nvarchar](200) NULL,
    [bDEM_WOR_qOccupationNR] [int] NULL,
    [bDEM_WOR_qMainTasks] [nvarchar](200) NULL,
    [bDEM_WOR_qMainTasksNR] [int] NULL,
    [bDEM_WOR_qFeelAboutJob] [int] NULL,
    [bDEM_WOR_qEmployArrangement] [int] NULL,
    [bDEM_WOR_qPermEmployee] [int] NULL,
    [qHasJobToStart] [int] NULL,
    [qLookedForWork] [int] NULL,
    [qJobSearchA] [int] NULL,
    [qJobSearchB] [int] NULL,
    [qJobSearchC] [int] NULL,
    [qJobSearchD] [int] NULL,
    [qJobSearchE] [int] NULL,
    [qJobSearchF] [int] NULL,
    [qJobSearchG] [int] NULL,
    [qJobSearchH] [int] NULL,
    [qJobSearchI] [int] NULL,
    [qJobSearchOth] [nvarchar](200) NULL,
        [qJobSearchOthNR] [int] NULL,
    [qCouldStartLastWk] [int] NULL,
    [qIncTotalAmt] [int] NULL,
    [fCountryName] [nvarchar](60) NULL
     ) ON [PRIMARY]

    GO

CREATE TABLE [dbo].[G_bLWW](
    [blaiseKey_code] [nvarchar](255) NULL,
    [qThingsWorthwhileScale] [int] NULL
 ) ON [PRIMARY]

回答1:

This script generate dynamic SQL for any table with similar PK name.

Query:

SET NOCOUNT ON

IF OBJECT_ID (N'dbo.A') IS NOT NULL
   DROP TABLE dbo.A

IF OBJECT_ID (N'dbo.B') IS NOT NULL
   DROP TABLE dbo.B

IF OBJECT_ID (N'dbo.C') IS NOT NULL
   DROP TABLE dbo.C

CREATE TABLE dbo.A (PK_field INT PRIMARY KEY, column1 INT, column2 INT)
CREATE TABLE dbo.B (PK_field INT PRIMARY KEY, column3 INT, column4 INT)
CREATE TABLE dbo.C (PK_field INT PRIMARY KEY, column5 INT, [column 6] INT)

INSERT INTO dbo.A (PK_field, column1, column2)
VALUES (1, 1, 2), (2, 1, 2) 

INSERT INTO dbo.B (PK_field, column3, column4)
VALUES (2, 3, 4) 

INSERT INTO dbo.C (PK_field, column5, [column 6])
VALUES (1, 5, 6), (3, 5, 6) 

DECLARE @SQL NVARCHAR(MAX)

;WITH cte AS 
(
    SELECT 
          column_name = '[' + c.name + ']'
        , table_name = '[' + s.name + '].[' + o.name + ']'
    FROM sys.columns c WITH (NOLOCK)
    JOIN sys.objects o WITH (NOLOCK) ON c.[object_id] = o.[object_id]
    JOIN sys.schemas s WITH (NOLOCK) ON o.[schema_id] = s.[schema_id]
    WHERE o.name IN ('A', 'B', 'C')
        AND s.name = 'dbo'
        AND o.[type] = 'U'  
), unicol AS (
    SELECT TOP 1 column_name 
    FROM cte 
    GROUP BY cte.column_name
    HAVING COUNT(cte.column_name) > 1
), cols AS 
(
    SELECT DISTINCT column_name 
    FROM cte    
), tbl AS 
(
    SELECT DISTINCT table_name
    FROM cte
), rs AS 
(
    SELECT 
          tbl.table_name
        , column_name = ISNULL(cte.column_name, cols.column_name + ' = NULL')
    FROM cols
    CROSS JOIN tbl
    LEFT JOIN cte ON cols.column_name = cte.column_name AND cte.table_name = tbl.table_name
), rs2 AS (
    SELECT uni = ' UNION ALL' + CHAR(13) + 'SELECT ' + STUFF((
        SELECT ', ' + rs.column_name
        FROM rs
        WHERE tbl.table_name = rs.table_name
        GROUP BY rs.column_name
        ORDER BY rs.column_name
        FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 2, '') + 
        ' FROM ' + table_name
    FROM tbl
) 
SELECT @SQL = 'SELECT 
' + STUFF((
    SELECT CHAR(13) + ', ' + ISNULL(unicol.column_name, cols.column_name + ' = MAX(' + cols.column_name + ')')
    FROM cols
    LEFT JOIN unicol ON cols.column_name = unicol.column_name
    FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 2, ' ')
 + ' 
FROM 
(' + STUFF((
    SELECT CHAR(10) + uni
    FROM rs2
    FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 11, '') + CHAR(13) + 
    ') t 
GROUP BY ' + (SELECT column_name FROM unicol)

PRINT @SQL

EXECUTE sys.sp_executesql @SQL

Output:

SELECT 
      [column 6] = MAX([column 6])
    , [column1] = MAX([column1])
    , [column2] = MAX([column2])
    , [column3] = MAX([column3])
    , [column4] = MAX([column4])
    , [column5] = MAX([column5])
    , [PK_field] 
FROM (
    SELECT [column 6] = NULL, [column1], [column2], [column3] = NULL, [column4] = NULL, [column5] = NULL, [PK_field] FROM [dbo].[A]
     UNION ALL
    SELECT [column 6] = NULL, [column1] = NULL, [column2] = NULL, [column3], [column4], [column5] = NULL, [PK_field] FROM [dbo].[B]
     UNION ALL
    SELECT [column 6], [column1] = NULL, [column2] = NULL, [column3] = NULL, [column4] = NULL, [column5], [PK_field] FROM [dbo].[C]
) t 
GROUP BY [PK_field]

Results:

column 6    column1     column2     column3     column4     column5     PK_field
----------- ----------- ----------- ----------- ----------- ----------- -----------
6           1           2           NULL        NULL        5           1
NULL        1           2           3           4           NULL        2
6           NULL        NULL        NULL        NULL        5           3

Update in script:

DECLARE @SQL NVARCHAR(2000) -> NVARCHAR(MAX)

Output for your DDL:

SELECT 
  [blaiseKey_code]
, [bDEM_BOP_q1stParentBornNZ] = MAX([bDEM_BOP_q1stParentBornNZ])
, [bDEM_BOP_q2ndParentBornNZ] = MAX([bDEM_BOP_q2ndParentBornNZ])
, [bDEM_BOP_qHowManyParentBornNZ] = MAX([bDEM_BOP_qHowManyParentBornNZ])
, [bDEM_BOP_qHowManyRaised] = MAX([bDEM_BOP_qHowManyRaised])
, [bDEM_WOR_q2Jobs1HrsIntro] = MAX([bDEM_WOR_q2Jobs1HrsIntro])
, [bDEM_WOR_q2Jobs2HrsIntro] = MAX([bDEM_WOR_q2Jobs2HrsIntro])
, [bDEM_WOR_q2JobsNoHrsIntro] = MAX([bDEM_WOR_q2JobsNoHrsIntro])
, [bDEM_WOR_qEmployArrangement] = MAX([bDEM_WOR_qEmployArrangement])
, [bDEM_WOR_qFeelAboutJob] = MAX([bDEM_WOR_qFeelAboutJob])
, [bDEM_WOR_qJobsNum] = MAX([bDEM_WOR_qJobsNum])
, [bDEM_WOR_qJobsNumNR] = MAX([bDEM_WOR_qJobsNumNR])
, [bDEM_WOR_qMainTasks] = MAX([bDEM_WOR_qMainTasks])
, [bDEM_WOR_qMainTasksNR] = MAX([bDEM_WOR_qMainTasksNR])
, [bDEM_WOR_qOccupation] = MAX([bDEM_WOR_qOccupation])
, [bDEM_WOR_qOccupationNR] = MAX([bDEM_WOR_qOccupationNR])
, [bDEM_WOR_qPaidWorkIntro] = MAX([bDEM_WOR_qPaidWorkIntro])
, [bDEM_WOR_qPermEmployee] = MAX([bDEM_WOR_qPermEmployee])
, [bDEM_WOR_tabDEM_T2_fTotMins] = MAX([bDEM_WOR_tabDEM_T2_fTotMins])
, [fCountryName] = MAX([fCountryName])
, [q3MthsStudy] = MAX([q3MthsStudy])
, [qAge] = MAX([qAge])
, [qAge15OrOver] = MAX([qAge15OrOver])
, [qAgeNR] = MAX([qAgeNR])
, [qAgeRange] = MAX([qAgeRange])
, [qArriveNZMth] = MAX([qArriveNZMth])
, [qArriveNZYr] = MAX([qArriveNZYr])
, [qArriveNZYrNR] = MAX([qArriveNZYrNR])
, [qAwayFromWork] = MAX([qAwayFromWork])
, [qBornInNZ] = MAX([qBornInNZ])
, [qCouldStartLastWk] = MAX([qCouldStartLastWk])
, [qCountryOfBirth] = MAX([qCountryOfBirth])
, [qDidPaidWork] = MAX([qDidPaidWork])
, [qDOB] = MAX([qDOB])
, [qDOBNR] = MAX([qDOBNR])
, [qFamilyBusWork] = MAX([qFamilyBusWork])
, [qHasJobToStart] = MAX([qHasJobToStart])
, [qHighestQual] = MAX([qHighestQual])
, [qHighestQualOth] = MAX([qHighestQualOth])
, [qHighestQualOthNR] = MAX([qHighestQualOthNR])
, [qHighestQualYr] = MAX([qHighestQualYr])
, [qHighestQualYrNR] = MAX([qHighestQualYrNR])
, [qIncTotalAmt] = MAX([qIncTotalAmt])
, [qJobSearchA] = MAX([qJobSearchA])
, [qJobSearchB] = MAX([qJobSearchB])
, [qJobSearchC] = MAX([qJobSearchC])
, [qJobSearchD] = MAX([qJobSearchD])
, [qJobSearchE] = MAX([qJobSearchE])
, [qJobSearchF] = MAX([qJobSearchF])
, [qJobSearchG] = MAX([qJobSearchG])
, [qJobSearchH] = MAX([qJobSearchH])
, [qJobSearchI] = MAX([qJobSearchI])
, [qJobSearchOth] = MAX([qJobSearchOth])
, [qJobSearchOthNR] = MAX([qJobSearchOthNR])
, [qLookedForWork] = MAX([qLookedForWork])
, [qMaoriDescent] = MAX([qMaoriDescent])
, [qNotEligible] = MAX([qNotEligible])
, [qPostSchQual] = MAX([qPostSchQual])
, [qSchQual] = MAX([qSchQual])
, [qSchQualOth] = MAX([qSchQualOth])
, [qSchQualOthNR] = MAX([qSchQualOthNR])
, [qSchQualYr] = MAX([qSchQualYr])
, [qSchQualYrNR] = MAX([qSchQualYrNR])
, [qSex] = MAX([qSex])
, [qThingsWorthwhileScale] = MAX([qThingsWorthwhileScale])
, [qWorkIntro] = MAX([qWorkIntro]) 
FROM 
(
SELECT [bDEM_BOP_q1stParentBornNZ], [bDEM_BOP_q2ndParentBornNZ], [bDEM_BOP_qHowManyParentBornNZ], [bDEM_BOP_qHowManyRaised], [bDEM_WOR_q2Jobs1HrsIntro], [bDEM_WOR_q2Jobs2HrsIntro], [bDEM_WOR_q2JobsNoHrsIntro], [bDEM_WOR_qEmployArrangement], [bDEM_WOR_qFeelAboutJob], [bDEM_WOR_qJobsNum], [bDEM_WOR_qJobsNumNR], [bDEM_WOR_qMainTasks], [bDEM_WOR_qMainTasksNR], [bDEM_WOR_qOccupation], [bDEM_WOR_qOccupationNR], [bDEM_WOR_qPaidWorkIntro], [bDEM_WOR_qPermEmployee], [bDEM_WOR_tabDEM_T2_fTotMins], [blaiseKey_code], [fCountryName], [q3MthsStudy], [qAge], [qAge15OrOver], [qAgeNR], [qAgeRange], [qArriveNZMth], [qArriveNZYr], [qArriveNZYrNR], [qAwayFromWork], [qBornInNZ], [qCouldStartLastWk], [qCountryOfBirth], [qDidPaidWork], [qDOB], [qDOBNR], [qFamilyBusWork], [qHasJobToStart], [qHighestQual], [qHighestQualOth], [qHighestQualOthNR], [qHighestQualYr], [qHighestQualYrNR], [qIncTotalAmt], [qJobSearchA], [qJobSearchB], [qJobSearchC], [qJobSearchD], [qJobSearchE], [qJobSearchF], [qJobSearchG], [qJobSearchH], [qJobSearchI], [qJobSearchOth], [qJobSearchOthNR], [qLookedForWork], [qMaoriDescent], [qNotEligible], [qPostSchQual], [qSchQual], [qSchQualOth], [qSchQualOthNR], [qSchQualYr], [qSchQualYrNR], [qSex], [qThingsWorthwhileScale] = NULL, [qWorkIntro] FROM [dbo].[G_bDEM]
 UNION ALL
SELECT [bDEM_BOP_q1stParentBornNZ] = NULL, [bDEM_BOP_q2ndParentBornNZ] = NULL, [bDEM_BOP_qHowManyParentBornNZ] = NULL, [bDEM_BOP_qHowManyRaised] = NULL, [bDEM_WOR_q2Jobs1HrsIntro] = NULL, [bDEM_WOR_q2Jobs2HrsIntro] = NULL, [bDEM_WOR_q2JobsNoHrsIntro] = NULL, [bDEM_WOR_qEmployArrangement] = NULL, [bDEM_WOR_qFeelAboutJob] = NULL, [bDEM_WOR_qJobsNum] = NULL, [bDEM_WOR_qJobsNumNR] = NULL, [bDEM_WOR_qMainTasks] = NULL, [bDEM_WOR_qMainTasksNR] = NULL, [bDEM_WOR_qOccupation] = NULL, [bDEM_WOR_qOccupationNR] = NULL, [bDEM_WOR_qPaidWorkIntro] = NULL, [bDEM_WOR_qPermEmployee] = NULL, [bDEM_WOR_tabDEM_T2_fTotMins] = NULL, [blaiseKey_code], [fCountryName] = NULL, [q3MthsStudy] = NULL, [qAge] = NULL, [qAge15OrOver] = NULL, [qAgeNR] = NULL, [qAgeRange] = NULL, [qArriveNZMth] = NULL, [qArriveNZYr] = NULL, [qArriveNZYrNR] = NULL, [qAwayFromWork] = NULL, [qBornInNZ] = NULL, [qCouldStartLastWk] = NULL, [qCountryOfBirth] = NULL, [qDidPaidWork] = NULL, [qDOB] = NULL, [qDOBNR] = NULL, [qFamilyBusWork] = NULL, [qHasJobToStart] = NULL, [qHighestQual] = NULL, [qHighestQualOth] = NULL, [qHighestQualOthNR] = NULL, [qHighestQualYr] = NULL, [qHighestQualYrNR] = NULL, [qIncTotalAmt] = NULL, [qJobSearchA] = NULL, [qJobSearchB] = NULL, [qJobSearchC] = NULL, [qJobSearchD] = NULL, [qJobSearchE] = NULL, [qJobSearchF] = NULL, [qJobSearchG] = NULL, [qJobSearchH] = NULL, [qJobSearchI] = NULL, [qJobSearchOth] = NULL, [qJobSearchOthNR] = NULL, [qLookedForWork] = NULL, [qMaoriDescent] = NULL, [qNotEligible] = NULL, [qPostSchQual] = NULL, [qSchQual] = NULL, [qSchQualOth] = NULL, [qSchQualOthNR] = NULL, [qSchQualYr] = NULL, [qSchQualYrNR] = NULL, [qSex] = NULL, [qThingsWorthwhileScale], [qWorkIntro] = NULL FROM [dbo].[G_bLWW]
) t 
GROUP BY [blaiseKey_code]


回答2:

Try this :

DECLARE
    @cols VARCHAR(MAX)
  , @TableA VARCHAR(10)= 'TableA'
  , @TableB VARCHAR(10)= 'TableB'
  , @TableC VARCHAR(10)= 'TableC'
  , @Pk VARCHAR(20)

SELECT
    @cols = STUFF((
            SELECT DISTINCT ', [' + c.column_name + ']'
            FROM INFORMATION_SCHEMA.Columns c
            WHERE c.table_name IN ( @TableA,@TableB,@TableC )
            FOR XML PATH('')
          ), 1, 2, '');

SELECT @Pk = column_name
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
WHERE OBJECTPROPERTY(OBJECT_ID(constraint_name), 'IsPrimaryKey') = 1
    AND table_name = @TableA

DECLARE @query VARCHAR(1000)
SET @query = 'SELECT ' + @cols + ' FROM ' + @TableA + ' JOIN ' + @TableB
    + ' ON ' + @TableA + '.' + @Pk + '=' + @TableB + '.' + @Pk
    + ' JOIN ' + @TableC + ' ON ' + @TableB + '.' + @Pk + '=' + @TableC
    + '.' + @Pk 

EXEC (@query)

Do not forget the warning about special html characters in column name given by @Gordon.



回答3:

You can only do this as a stored procedure. A SQL query returns a specified set of columns, no more, no less. The only way to get a variable number of columns is using dynamic SQL. And, functions don't support dynamic SQL.

You would need to construct a SQL statement, concatenating the column names from INFORMATION_SCHEMA.Columns. Something like this:

declare @cols varchar(max);

select @cols = stuff((select distinct ', ['+c.column_name+']'
                      from INFORMATION_SCHEMA.Columns c
                      where c.table_name in (<list of tables here>)
                      for xml path ('')
                     ), 1, 2, '');

This will not work for column names that have special html characters, such as '<', '>', or '&'.

You can then construct the full query statement and execute it, with exec() or sp_executesql().

An alternative approach would be to create a view that has all the joins and all the columns. Let the SQL optimizer determine the best execution path.