Character mapping / search and replace character b

2020-05-01 08:10发布

问题:

I am running on SQL Server 2008 R2 and we have a requirement here whereby I need to create replace certain English characters to locale language characters used previously in legacy system.

For this, I would probably use the replace function in T-SQL, but in my practice, we are replacing character by character for example

'ASkjDe'
A=char(XX)
S=char(XX)
k=char(XX)
j=char(XX)
D=char(XX)
e=char(XX)

Now, I want to avoid using nested replace which means I have to do 32 character check. Is there such a function which I can pass it list of values?

We already have a function in place which does the replacing but its very slow and we are hoping that replace function would do a better job. Below is the current function we are using

CREATE FUNCTION [dbo].[ArabicToString] (@inString VARCHAR(MAX))
RETURNS NVARCHAR(MAX)
AS
BEGIN
  DECLARE @Vchar char(1), @Result NVARCHAR(MAX), @Flag BIT, @Position INT, @StrLength INT, @LChar CHAR, @HChar NCHAR

  SET @Result = ''
  SET @Position = 1
  SET @StrLength = DATALENGTH(@inString)  -- string length not going to change
  SET @VChar = SUBSTRING(@inString, @Position, 1)
  if ASCII(@Vchar) > 189 and ASCII(@Vchar) < 255 
     RETURN Rtrim(Ltrim(@inString))

  WHILE (@Position <= @StrLength)  -- leave loop if bad character found
  BEGIN
      -- Reset holders
      SET @LChar = SUBSTRING(@inString, @Position, 1)
      Set @HChar = ''

      if cast(@LChar as varbinary(1))=cast('*' as varbinary(1))
            Set @HChar = '*'
      else if cast(@LChar as varbinary(1)) = cast('-' as varbinary(1))
            set @HChar = '-'
      else if cast(@LChar as varbinary(1)) = cast('A' as varbinary(1))
            set @HChar = 'ء'
      else if cast(@LChar as varbinary(1)) = cast('B' as varbinary(1))
            set @HChar='آ'
      else if cast(@LChar as varbinary(1)) = cast('C' as varbinary(1))
            set @HChar='أ'
      else if cast(@LChar as varbinary(1)) = cast('D' as varbinary(1))
            set @HChar='ؤ'
      else if cast(@LChar as varbinary(1)) = cast('E' as varbinary(1))
            set @HChar='إ'
      else if cast(@LChar as varbinary(1)) = cast('F' as varbinary(1))
            set @HChar ='ئ'
      else if cast(@LChar as varbinary(1)) = cast('G' as varbinary(1))
            set @HChar='ا'
      else if cast(@LChar as varbinary(1)) = cast('H' as varbinary(1))
            set @HChar='ب'
      else if cast(@LChar as varbinary(1)) = cast('I' as varbinary(1))
            set @HChar='ة'
      else if cast(@LChar as varbinary(1)) = cast('J' as varbinary(1))
            set @HChar='ت'
      else if cast(@LChar as varbinary(1)) = cast('K' as varbinary(1))
            set @HChar='ث'
      else if cast(@LChar as varbinary(1)) = cast('L' as varbinary(1))
            set @HChar='ج'
      else if cast(@LChar as varbinary(1)) = cast('M' as varbinary(1))
            set @HChar='ح'
      else if cast(@LChar as varbinary(1)) = cast('N' as varbinary(1))
            set @HChar='خ'
      else if cast(@LChar as varbinary(1)) = cast('O' as varbinary(1))
            set @HChar='د'
      else if cast(@LChar as varbinary(1)) = cast('P' as varbinary(1))
            set @HChar='ذ'
      else if cast(@LChar as varbinary(1)) = cast('Q' as varbinary(1))
            set @HChar='ر'
      else if cast(@LChar as varbinary(1)) = cast('R' as varbinary(1))
            set @HChar='ز'
      else if cast(@LChar as varbinary(1)) = cast('S' as varbinary(1))
            set @HChar='س'
      else if cast(@LChar as varbinary(1)) = cast('T' as varbinary(1))
            set @HChar='ش'
      else if cast(@LChar as varbinary(1)) = cast('U' as varbinary(1))
            set @HChar='ص'
      else if cast(@LChar as varbinary(1)) = cast('V' as varbinary(1))
            set @HChar='ض'
      else if cast(@LChar as varbinary(1)) = cast('W' as varbinary(1))
            set @HChar='ط'
      else if cast(@LChar as varbinary(1)) = cast('X' as varbinary(1))
            set @HChar='ظ'
      else if cast(@LChar as varbinary(1)) = cast('Y' as varbinary(1))
            set @HChar='ع'
      else if cast(@LChar as varbinary(1)) = cast('Z' as varbinary(1))
            set @HChar='غ'
      else if cast(@LChar as varbinary(1)) = cast('a' as varbinary(1))
            set @HChar='ف'
      else if cast(@LChar as varbinary(1)) = cast('b' as varbinary(1))
            set @HChar='ق'
      else if cast(@LChar as varbinary(1)) = cast('c' as varbinary(1))
            set @HChar='ك'
      else if cast(@LChar as varbinary(1)) = cast('d' as varbinary(1))
            set @HChar='ل'    
      else if cast(@LChar as varbinary(1)) = cast('e' as varbinary(1))
            set @HChar='م'
      else if cast(@LChar as varbinary(1)) = cast('f' as varbinary(1))
            set @HChar='ن'
      else if cast(@LChar as varbinary(1)) = cast('g' as varbinary(1))
            set @HChar='ه'    
      else if cast(@LChar as varbinary(1)) = cast('h' as varbinary(1))
            set @HChar='و'
      else if cast(@LChar as varbinary(1)) = cast('i' as varbinary(1))
            set @HChar='ى'
      else if cast(@LChar as varbinary(1)) = cast('j' as varbinary(1))
            set @HChar='ي'    


      else if cast(@LChar as varbinary(1)) = cast('v' as varbinary(1))
            set @HChar='ـ'


      else if cast(@LChar as varbinary(1)) = cast('1' as varbinary(1))
            set @HChar='١'
      else if cast(@LChar as varbinary(1)) = cast('2' as varbinary(1))
            set @HChar='٢'    
      else if cast(@LChar as varbinary(1)) = cast('3' as varbinary(1))
            set @HChar='٣'
      else if cast(@LChar as varbinary(1)) = cast('4' as varbinary(1))
            set @HChar='٤'
      else if cast(@LChar as varbinary(1)) = cast('5' as varbinary(1))
            set @HChar='٥'
      else if cast(@LChar as varbinary(1)) = cast('6' as varbinary(1))
            set @HChar='٦'    
      else if cast(@LChar as varbinary(1)) = cast('7' as varbinary(1))
            set @HChar='٧'
      else if cast(@LChar as varbinary(1)) = cast('8' as varbinary(1))
            set @HChar='٨'
      else if cast(@LChar as varbinary(1)) = cast('9' as varbinary(1))
            set @HChar='٩'    
      else if cast(@LChar as varbinary(1)) = cast('0' as varbinary(1))
            set @HChar='٠'

      else if @LChar='/'
            set @HChar='\'

      select @Result=@Result + @HChar

      -- Add one to position 
      SET @Position= @Position + 1
  END

  RETURN Rtrim(Ltrim(@Result))
END

If someone can suggest a different approach, something even other than the replace function or the above approach we currently using I would appreciate it.

回答1:

Does this run any faster for you? (I used my collation of SQL_Latin1_General_CP1_CS_AS, you might want to change that).

ALTER FUNCTION [dbo].[ArabicToString] (@inString VARCHAR(MAX))
RETURNS NVARCHAR(MAX)
AS
BEGIN
    DECLARE @MappingCharacters TABLE
    (
        InputCharacter NCHAR(1) COLLATE SQL_Latin1_General_CP1_CS_AS PRIMARY KEY,
        OutputChar NCHAR(1)
    )

    INSERT @MappingCharacters
    VALUES
        ('A', 'ء')
        ,('B', 'آ')
        ,('C', 'أ')
        ,('D', 'ؤ')
        ,('E', 'إ')
        ,('F', 'ئ')
        ,('G', 'ا')
        ,('H', 'ب')
        ,('I', 'ة')
        ,('J', 'ت')
        ,('K', 'ث')
        ,('L', 'ج')
        ,('M', 'ح')
        ,('N', 'خ')
        ,('O', 'د')
        ,('P', 'ذ')
        ,('Q', 'ر')
        ,('R', 'ز')
        ,('S', 'س')
        ,('T', 'ش')
        ,('U', 'ص')
        ,('V', 'ض')
        ,('W', 'ط')
        ,('X', 'ظ')
        ,('Y', 'ع')
        ,('Z', 'غ')
        ,('a', 'ف')
        ,('b', 'ق')
        ,('c', 'ك')
        ,('d', 'ل')
        ,('e', 'م')
        ,('f', 'ن')
        ,('g', 'ه')
        ,('h', 'و')
        ,('i', 'ى')
        ,('j', 'ي')
        ,('v', 'ـ')
        ,('1', '١')
        ,('2', '٢')
        ,('3', '٣')
        ,('4', '٤')
        ,('5', '٥')
        ,('6', '٦')
        ,('7', '٧')
        ,('8', '٨')
        ,('9', '٩')
        ,('0', '٠')
        ,('/', '\')

    DECLARE @Result NVARCHAR(MAX) = ''
        , @Position INT = 1
        , @StrLength INT = DATALENGTH(@inString)

    DECLARE @Vchar char(1), @NextChar NCHAR(1)

    SET @VChar = SUBSTRING(@inString, @Position, 1)
    IF ASCII(@Vchar) > 189 and ASCII(@Vchar) < 255 
    RETURN Rtrim(Ltrim(@inString))

    WHILE (@Position <= @StrLength)  -- leave loop if bad character found
    BEGIN
        -- Reset holders
        SET @NextChar = SUBSTRING(@inString, @Position, 1)

        SET @Result = @Result + ISNULL((SELECT OutputChar FROM @MappingCharacters MC WHERE InputCharacter = @NextChar COLLATE SQL_Latin1_General_CP1_CS_AS), @NextChar)

        -- Add one to position 
        SET @Position= @Position + 1
    END

  RETURN Rtrim(Ltrim(@Result))
END
GO

It is very similar, but I'm hoping that rather than a very large series of else if checks, SQL Server will perform better when working with a set. I'm not sure if this would work better if @MappingCharacters was a real table or not.