ms sql split by & based on data as a column name

2019-08-31 05:40发布

问题:

I have 4 rows in 'dataset' table in 'result' field and it is seperated by ampersand (&). I want to divide 'result' field into multiple fields based on field name. How do I do this in SSIS or SQL? Fieldname are higligtened. It would be nice it can be doable without .net script. If you notice, field names are jumbled.

Result (fieldname)

deptid=1 & firstname=din & lastname=kal & middlename=kum

firstname=raj & lastname=puli & middlename=kumar & deptid=2

firstname=pavan & lastname=gud & middlename=kumarrao

deptid=7 & firstname=sha & lastname=hank

Output

**deptid**  **firstname**   **lastname**    **middlename**

1               din             kal              kum

2               raj             puli             kumar

                pavan           gud              kumarrao

7               sha             hank     

回答1:

CROSS APPLY the table with some form of split string function. Use these results to create a Dynamic SQL command and execute. Persist these results to a new table and DROP the old one. Storing data like that is not okay.

Random SplitString:

IF NOT EXISTS ( SELECT  1
                FROM    sys.objects
                WHERE   name = 'SplitString'
                    AND type = 'TF' )
BEGIN
    --DROP FUNCTION dbo.SplitString;
    EXEC( 'CREATE FUNCTION dbo.SplitString() RETURNS @t TABLE ( x BIT ) AS BEGIN RETURN; END' );
END;
GO

ALTER FUNCTION dbo.SplitString 
(
    @String             VARCHAR( MAX ),
    @Delimiter          VARCHAR( 16 )
) RETURNS @Values TABLE
(
    SplitString         VARCHAR( MAX )          
) AS BEGIN
    /*
        dbo.SplitString

        @Param:
        String, the string to be split,
        Delimiter, the delimiter to use.

        @Returns:
        Table, each tuple consisting of a tokenized value of the original string, 
            split by the provided delimiter.  Empty expressions result in NULL values.

        @Example:
        SELECT  *
        FROM    dbo.SplitString( 'This is a test', ' ' );

        SELECT  *
        FROM    dbo.SplitString( NULL, ' ' );       
     */
    DECLARE @i          INTEGER,
            @Split      VARCHAR( MAX ),
            @RowCount   INTEGER;
        SET @RowCount = 0;

        SET @i = -1;        
    WHILE ( LEN( ISNULL( @String, '' ) ) > 0 )
    BEGIN
        SET @String = LTRIM( RTRIM( @String ) );

        SET @i = CHARINDEX( @Delimiter, @String );
        IF ( @i = 0 )
        BEGIN
            INSERT INTO @Values ( SplitString )
            VALUES ( LTRIM( RTRIM( @String ) ) );

            SET @String = '';
        END ELSE BEGIN
            INSERT INTO @Values ( SplitString )
            VALUES ( LTRIM( RTRIM( LEFT( @String, @i - LEN( @Delimiter ) ) ) ) );

            SET @String = RIGHT( @String, LEN( @String ) - @i );
        END;

        SET @RowCount = @RowCount + 1;
    END;  

    IF ( @RowCount = 0 )
    BEGIN
        INSERT INTO @Values ( SplitString )
        VALUES ( NULL );
    END;  

    RETURN;
END;
GO

A Test Case:

IF NOT EXISTS ( SELECT  1
                FROM    sys.objects
                WHERE   name = 'DropThisNotAtomicNonsense'
                    AND type = 'U' )
BEGIN
    --DROP TABLE dbo.DropThisNotAtomicNonsense;
    CREATE TABLE dbo.DropThisNotAtomicNonsense
    (
        DropThisNotAtomicNonsense_PK    TINYINT IDENTITY( 1, 1 ) NOT NULL,
                                            PRIMARY KEY ( DropThisNotAtomicNonsense_PK ),
        Result          VARCHAR( MAX )
    );

    INSERT INTO dbo.DropThisNotAtomicNonsense ( Result )
    VALUES  ( 'deptid=1 & firstname=din & lastname=kal & middlename=kum' ),
            ( 'firstname=raj & lastname=puli & middlename=kumar & deptid=2' ),
            ( 'firstname=pavan & lastname=gud & middlename=kumarrao' ),
            ( 'deptid=7 & firstname=sha & lastname=hank' );
END;
GO

Problems Solved:

SET NOCOUNT ON;

DECLARE @Cols       VARCHAR( MAX ),
        @SQL        NVARCHAR( MAX );

CREATE TABLE #t_KeyValue
(
    OriginalEntity  TINYINT,
    FieldName       VARCHAR( MAX ),
    FieldValue      VARCHAR( MAX )
);

INSERT INTO #t_KeyValue ( OriginalEntity, FieldName, FieldValue )
SELECT  OriginalEntity = ugh.DropThisNotAtomicNonsense_PK,
        FieldName = LEFT( x.SplitString, CHARINDEX( '=', x.SplitString ) - 1 ),
        FieldValue = RIGHT( x.SplitString, CHARINDEX( '=', REVERSE( x.SplitString ) ) - 1 )
FROM    dbo.DropThisNotAtomicNonsense ugh
CROSS APPLY dbo.SplitString( ugh.Result, '&' ) x;

CREATE TABLE #t_ColumnList
(
    FieldName               VARCHAR( MAX ),
    Processed               TINYINT NOT NULL DEFAULT 0
);

INSERT INTO #t_ColumnList ( FieldName )
SELECT  DISTINCT FieldName
FROM    #t_KeyValue;

IF NOT EXISTS ( SELECT  1
                FROM    sys.objects
                WHERE   name = 'NormalTable'
                    AND type = 'U' )
BEGIN
    --DROP TABLE dbo.NormalTable;
    CREATE TABLE dbo.NormalTable
    (
        NormalTable_PK      TINYINT IDENTITY( 1, 1 ) NOT NULL,
                                PRIMARY KEY ( NormalTable_PK ),
        OriginalEntity      TINYINT
    )

    WHILE EXISTS (  SELECT  1
                FROM    #t_ColumnList
                WHERE   Processed = 0 )
    BEGIN
        SELECT  @SQL = FieldName
        FROM    #t_ColumnList
        WHERE   Processed = 0;

        UPDATE  #t_ColumnList
            SET Processed = 1
        WHERE   FieldName = @SQL;

        SET @SQL = N'
            ALTER TABLE dbo.NormalTable
            ADD [' + REPLACE( @SQL, '''', '' ) + ']     VARCHAR( 128 );'
        EXECUTE dbo.sp_executesql @command = @SQL;
    END;

    SELECT  @Cols = ISNULL( @Cols, '' ) + '[' + FieldName + '], '
    FROM (  SELECT  DISTINCT FieldName
            FROM    #t_KeyValue ) l
        SET @Cols = LEFT( @Cols, LEN( @Cols ) - 1 );

    SET @SQL = N'
        INSERT INTO dbo.NormalTable ( OriginalEntity, ' + @Cols + ' )
        SELECT  OriginalEntity, ' + @Cols + '
        FROM (  SELECT  OriginalEntity, FieldName, FieldValue
                FROM    #t_KeyValue ) s
        PIVOT ( MAX( FieldValue ) FOR
                FieldName IN ( ' + @Cols + ' ) ) p;';
    RAISERROR ( @SQL, 0, 1 ) WITH NOWAIT;
    EXECUTE sp_executesql @command = @SQL;
END;
GO

DROP TABLE #t_ColumnList;
DROP TABLE #t_KeyValue;
GO

SELECT  *
FROM    dbo.NormalTable;
GO

DROP TABLE dbo.DropThisNotAtomicNonsense;
GO

SET NOCOUNT OFF;

Now, with your nice, new table, you can perfom ALTER ... COLUMN statements to switch to some proper data types on those columns.



回答2:

with x as (
    select cast('<e col="' + replace(replace(result, '=', '">'), '&', '</e><e col="') + '</e>' as xml) as x
      from dataset
)
select x.value('(/e[contains(@col, "deptid")])[1]', 'int') as deptid
      ,x.value('(/e[contains(@col, "firstname")])[1]', 'nvarchar(255)') as firstname
      ,x.value('(/e[contains(@col, "lastname")])[1]', 'nvarchar(255)') as lastname
      ,x.value('(/e[contains(@col, "middlename")])[1]', 'nvarchar(255)') as middlename
  from x

should deliver what you want. It is using an intermediate XML object and xml processing to achieve the result. The intermediate column looks like

<e col="deptid">1 </e><e col=" firstname">din </e><e col=" lastname">kal </e><e col=" middlename">kum</e>

This solution is not absolutely fool proof, but if you are sure the format is stable and does not contain completely odd formated entries, it should work. Note: I need contains, as SQL Server does not seem to have a trim function in XPath. This will fail in case a & or = are contained in the data for other purposes than separating field name from content or separating fields, i. e. if these characters would be contained in the names.