XML parsing semicolon expected - only when replaci

2019-09-06 21:48发布

问题:

I have the following query, which works:

DECLARE @Combined VARCHAR(MAX)

SET @Combined = 'mac cheese';

DECLARE @KeyTable TABLE (Keyword VARCHAR(MAX))
INSERT INTO @KeyTable
SELECT @Combined

IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
    DROP TABLE #Temp

SELECT LTRIM(RTRIM(m.n.value('.[1]','varchar(8000)'))) AS Keyword
    INTO #Temp
FROM
(
SELECT CAST('<XMLRoot><RowData>' + REPLACE(Keyword,',','</RowData><RowData>') + '</RowData></XMLRoot>' AS XML) AS x
FROM @KeyTable
)t
CROSS APPLY x.nodes('/XMLRoot/RowData')m(n);

SELECT * FROM #Temp

When I try to add an ampersand to the @Combined it gives a special character error:

DECLARE @Combined VARCHAR(MAX)

SET @Combined = 'mac & cheese';

DECLARE @KeyTable TABLE (Keyword VARCHAR(MAX))
INSERT INTO @KeyTable
SELECT @Combined

IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
    DROP TABLE #Temp

SELECT LTRIM(RTRIM(m.n.value('.[1]','varchar(8000)'))) AS Keyword
    INTO #Temp
FROM
(
SELECT CAST('<XMLRoot><RowData>' + REPLACE(Keyword,',','</RowData><RowData>') + '</RowData></XMLRoot>' AS XML) AS x
FROM @KeyTable
)t
CROSS APPLY x.nodes('/XMLRoot/RowData')m(n);

SELECT * FROM #Temp

So I added some code to replace the ampersand with &amp and I get a semicolon error:

DECLARE @Combined VARCHAR(MAX)

SET @Combined = 'mac & cheese'

SET @Combined = REPLACE(@Combined, '&', '&amp')

DECLARE @KeyTable TABLE (Keyword VARCHAR(MAX))

INSERT INTO @KeyTable
SELECT @Combined;

IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
    DROP TABLE #Temp

SELECT LTRIM(RTRIM(m.n.value('.[1]','varchar(8000)'))) AS Keyword
    INTO #Temp
FROM
(
SELECT CAST('<XMLRoot><RowData>' + REPLACE(Keyword,',','</RowData><RowData>') + '</RowData></XMLRoot>' AS XML) AS x
FROM @KeyTable
)t
CROSS APPLY x.nodes('/XMLRoot/RowData')m(n);

SELECT * FROM #Temp

I've attempted moving the semicolon to various locations and even adding more but it just won't work. Any help anyone can provide would be appreciated!

回答1:

No, you should not start to replace special characters on your own!

Next time your string contains a < or a -> and will break again.

Let FOR XML PATH() do the hard work for you:

SELECT 'This is pure text: ' + 'mac & cheese,<test1>,"test2"';
SELECT 'This is encoded: ' + (SELECT 'mac & cheese,<test1>,"test2"' AS [*] FOR XML PATH(''))

The result of the second: This is encoded: mac &amp; cheese,&lt;test1&gt;,"test2"

UPDATE for your code:

The only thing you have to change, is to use (SELECT ... FOR XML PATH()) instead of the naked Keyword

DECLARE @KeyTable TABLE (Keyword VARCHAR(MAX))
INSERT INTO @KeyTable VALUES('mac & cheese,<test1>,"test2"');

SELECT LTRIM(RTRIM(m.n.value('.[1]','varchar(8000)'))) AS Keyword
FROM
(
SELECT CAST('<XMLRoot><RowData>' + REPLACE((SELECT Keyword AS [*] FOR XML PATH('')),',','</RowData><RowData>') + '</RowData></XMLRoot>' AS XML) AS x
FROM @KeyTable
)t
CROSS APPLY x.nodes('/XMLRoot/RowData')m(n);

This will replace all forbidden characters implicitly...

The result

mac & cheese
<test1>
"test2"