Efficiently convert rows to columns in SQL Server

2020-02-16 04:01发布

I have 3 tables: testpackage, testpackageReport, testpackagereportdetail with this structure:

enter image description here

With this query

SELECT        
    dbo.TestPackages.PackageNumber, dbo.TestPackages.Size, 
    dbo.TestPackages.Code, dbo.TestPackageReports.ReportNumber,   
    dbo.TestPackageReportDetails.Step, dbo.TestPackageReportDetails.Status, 
    dbo.TestPackageReports.SubmitDateTime, 
    dbo.TestPackageReportDetails.Id AS ReportDetailId
FROM
    dbo.TestPackages 
INNER JOIN
    dbo.TestPackageReportDetails ON dbo.TestPackages.Id = dbo.TestPackageReportDetails.TestPackageId 
INNER JOIN
    dbo.TestPackageReports ON dbo.TestPackageReportDetails.TestPackageReportId = dbo.TestPackageReports.Id

The result is this:

PackageNumber   Size    Code    ReportNumber    Step        Status  SubmitDateTime  ReportDetailId
1000220-G-02-TR 1.31    143     LC-0431         LineCheck   Reject  2010-12-12     218
1000220-G-02-TR 1.31    143     LC-0131         LineCheck   Accept  2011-12-12     220
1000220-G-02-TR 1.31    143     PT-0248         Test        Accept  2012-12-12     513
1000220-G-02-TR 1.31    143     DR-0202         Drying      Accept  2013-12-12     625
1000220-G-02-TR 1.31    143     AFP-AG-FL-0030  Flushing    Accept  2015-12-12     745

But I need to show this data just in one row like this:

 PackageNumber   Size   Code    LineCheckReportNumber   LineCheckStep       LineCheckStatus linecheckSubmitDateTime ReportDetailId      tesReportNumber testCheckStep       testStatus  testSubmitDateTime  ReportDetailId

1000220-G-02-TR 1.31   143             LC-0431          LineCheck                Accept       2011-12-12              220                      PT-0248        Test               Accept       2012-12-12              513

For noisy data in the expected result I remove drying and flushing columns. As you can see I need all this records to be shown as a one row, another this that is important is the data with maximum ReportDetailId that is accepted because every testpackage can have multi linecheck or test or flsuhing or drying reports

Sample data:

TestpackageTable

enter image description here

TestpackageReport

enter image description here

Testpackagereportdetail

enter image description here

I write the query using entity framework as you can see but it is very slow :

from i in _ctx.TestPackages
join testpackreportdet in _ctx.TestPackageReportDetails on i.Id equals
    testpackreportdet.TestPackageId
join testPackageRepo in _ctx.TestPackageReports on testpackreportdet.TestPackageReportId equals testPackageRepo.Id into g1
from y1 in g1.DefaultIfEmpty()
group new { y1, testpackreportdet } by new { i }
into grouping
let MaxLinecheck = grouping.Select(item => item.testpackreportdet)
    .Where(item => item != null && item.Step == "LineCheck")
    .OrderByDescending(item => item.Id)

let MaxClean = grouping.Select(item => item.testpackreportdet)
    .Where(item => item != null && item.Step == "Clean")
    .OrderByDescending(item => item.Id)

let MaxTest = grouping.Select(item => item.testpackreportdet)
    .Where(item => item != null && item.Step == "Test")
    .OrderByDescending(item => item.Id)

let MaxFlush = grouping.Select(item => item.testpackreportdet)
    .Where(item => item != null && item.Step == "Flushing")
    .OrderByDescending(item => item.Id)

let MaxDrying = grouping.Select(item => item.testpackreportdet)
    .Where(item => item != null && item.Step == "Drying")
    .OrderByDescending(item => item.Id)

let MaxReins = grouping.Select(item => item.testpackreportdet)
    .Where(item => item != null && item.Step == "Reinstatment")
    .OrderByDescending(item => item.Id)

let MaxMono = grouping.Select(item => item.testpackreportdet)
 .Where(item => item != null && item.Step == "Mono")
 .OrderByDescending(item => item.Id)

let MaxPAD = grouping.Select(item => item.testpackreportdet)
 .Where(item => item != null && item.Step == "PADTest")
 .OrderByDescending(item => item.Id)

let MaxVariation = grouping.Select(item => item.testpackreportdet)
 .Where(item => item != null && item.Step == "Variation")
 .OrderByDescending(item => item.Id)
    select new ViewDomainClass.TechnicalOffice.ViewTestPackageState()
    {
        Id = grouping.Key.i.Id,
        PackageNumber = grouping.Key.i.PackageNumber,
        Size = grouping.Key.i.Size.ToString(),
        Code = grouping.Key.i.Code,
        TestPackageOrder = grouping.Key.i.TestPackageOrder,
        LineCheckState = MaxLinecheck.FirstOrDefault().Status,
        LineCheckSubmitDateTime =
            grouping.Where(
                i => i.y1.Id == MaxLinecheck.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.SubmitDateTime.ToString(),
        LineCheckReportNumber =
            grouping.Where(
                i => i.y1.Id == MaxLinecheck.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.ReportNumber,
        CleaningState = MaxClean.FirstOrDefault().Status,
        CleanSubmitDateTime =
            grouping.Where(i => i.y1.Id == MaxClean.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.SubmitDateTime.ToString(),
        CleanReportNumber =
            grouping.Where(i => i.y1.Id == MaxClean.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.ReportNumber,
        TestState = MaxTest.FirstOrDefault().Status,
        TestSubmitDateTime =
            grouping.Where(i => i.y1.Id == MaxTest.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.SubmitDateTime.ToString(),
        TestReportNumber =
            grouping.Where(i => i.y1.Id == MaxTest.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.ReportNumber,

        Drying = MaxDrying.FirstOrDefault().Status,
        DryingSubmitDateTime =
            grouping.Where(i => i.y1.Id == MaxDrying.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.SubmitDateTime.ToString(),
        DryingReportNumber =
            grouping.Where(i => i.y1.Id == MaxDrying.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.ReportNumber,
        Flushing = MaxFlush.FirstOrDefault().Status,
        FlushingSubmitDateTime =
            grouping.Where(i => i.y1.Id == MaxFlush.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.SubmitDateTime.ToString(),
        FlushingReportNumber =
            grouping.Where(i => i.y1.Id == MaxFlush.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.ReportNumber,
        ReInstatement = MaxReins.FirstOrDefault().Status,
        ReInstatementSubmitDateTime =
            grouping.Where(i => i.y1.Id == MaxReins.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.SubmitDateTime.ToString(),
        ReInstatementReportNumber =
            grouping.Where(i => i.y1.Id == MaxReins.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.ReportNumber,
        Mono = MaxMono.FirstOrDefault().Status,
        MonoSubmitDateTime =
            grouping.Where(i => i.y1.Id == MaxMono.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.SubmitDateTime.ToString(),
        MonoReportNumber =
            grouping.Where(i => i.y1.Id == MaxMono.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.ReportNumber,
        Variation = MaxVariation.FirstOrDefault().Status,
        VariationSubmitDateTime =
            grouping.Where(i => i.y1.Id == MaxVariation.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.SubmitDateTime.ToString(),
        VariationReportNumber =
            grouping.Where(i => i.y1.Id == MaxVariation.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.ReportNumber,
        PAD = MaxPAD.FirstOrDefault().Status,
        PADSubmitDateTime = 
            grouping.Where(i => i.y1.Id == MaxPAD.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.SubmitDateTime.ToString(),
        PADReportNumber = 
            grouping.Where(i => i.y1.Id == MaxPAD.FirstOrDefault().TestPackageReportId)
                .FirstOrDefault()
                .y1.ReportNumber
}).ToList();

1条回答
forever°为你锁心
2楼-- · 2020-02-16 05:01

You need pivoting and dynamic SQL. I would suggest to put output of your query into temporary table and then work with it:

USE YourDatabase --Use your database

SELECT  TP.PackageNumber,
        TP.Size, 
        TP.Code, 
        R.ReportNumber,   
        RD.Step, 
        RD.[Status], 
        R.SubmitDateTime, 
        RD.Id AS ReportDetailId
INTO #temporary --it will automatically create #temporary table with results
FROM dbo.TestPackages TP
INNER JOIN dbo.TestPackageReportDetails RD
    ON TP.Id = RD.TestPackageId 
INNER JOIN dbo.TestPackageReports R
    ON RD.TestPackageReportId = R.Id

USE tempdb --switch to tempdb

SELECT TOP 1 WITH TIES * INTO #temp
FROM #temporary
ORDER BY ROW_NUMBER() OVER (PARTITION BY PackageNumber, Size, Code, Step ORDER BY ReportDetailId DESC) 
--Here we get only rows with maximum ReportDetailID over PackageNumber, Size, Code, Step

DROP TABLE #temporary --get rid of #temporary, now we use #temp

DECLARE @pvt_columns nvarchar(max), --to store columns for pivoting
        @unpvt_columns nvarchar(max), --to store columns that will be converted into 1 datatype for unpivoting
        @columns nvarchar(max), -- columns comma separated
        @sql nvarchar(max) --store query to run

SELECT @pvt_columns = COALESCE(@pvt_columns,'') + ','+QUOTENAME(Step+name)
FROM (
    SELECT name
    FROM sys.columns 
    WHERE object_id = OBJECT_ID(N'#temp')
    AND name not in ('PackageNumber','Size','Code','Step')
) names
CROSS JOIN (
    SELECT DISTINCT Step
    FROM #temp
    ) steps


SELECT  @unpvt_columns = COALESCE(@unpvt_columns,'')+',CAST('+QUOTENAME(name)+' as nvarchar(max)) '+QUOTENAME(name),
        @columns = COALESCE(@columns,'') + ','+QUOTENAME(name)
FROM sys.columns 
WHERE object_id = OBJECT_ID(N'#temp')
AND name not in ('PackageNumber','Size','Code','Step')

SELECT @sql = N'
SELECT *
FROM (
    SELECT  PackageNumber,
            Size,
            Code,
            Step+[Columns] as [Columns],
            [Values]
    FROM (
        SELECT  PackageNumber,
                Size,
                Code,
                Step'+@unpvt_columns+'
        FROM #temp) t
    UNPIVOT (
        [Values] FOR [Columns] IN ('+STUFF(@columns,1,1,'')+')
    ) unpvt
    ) p
PIVOT (
    MAX([Values]) FOR [Columns] in ('+STUFF(@pvt_columns,1,1,'')+')
) pvt'

EXEC sp_executesql @sql

DROP TABLE #temp

Output:

PackageNumber   Size    Code    DryingReportDetailId    DryingReportNumber  DryingStatus    DryingSubmitDateTime    FlushingReportDetailId  FlushingReportNumber    FlushingStatus  FlushingSubmitDateTime  LineCheckReportDetailId LineCheckReportNumber   LineCheckStatus LineCheckSubmitDateTime TestReportDetailId  TestReportNumber    TestStatus  TestSubmitDateTime
1000220-G-02-TR 1,31    143     625                     DR-0202             Accept          2013-12-12              745                     AFP-AG-FL-0030          Accept          2015-12-12              220                     LC-0131                 Accept          2011-12-12              513                 PT-0248             Accept      2012-12-12

You can SELECT variables to see what is stored in them, and PRINT @sql to see the full text of query.

查看更多
登录 后发表回答