How to strip all non-alphabetic characters from st

2018-12-31 04:19发布

How could you remove all characters that are not alphabetic from a string?

What about non-alphanumeric?

Does this have to be a custom function or are there also more generalizable solutions?

19条回答
还给你的自由
2楼-- · 2018-12-31 04:44

If you are like me and don't have access to just add functions to your production data but still want to perform this kind of filtering, here's a pure SQL solution using a PIVOT table to put the filtered pieces back together again.

N.B. I hardcoded the table up to 40 characters, you'll have to add more if you have longer strings to filter.

SET CONCAT_NULL_YIELDS_NULL OFF;

with 
    ToBeScrubbed
as (
    select 1 as id, '*SOME 222@ !@* #* BOGUS !@*&! DATA' as ColumnToScrub
),

Scrubbed as (
    select 
        P.Number as ValueOrder,
        isnull ( substring ( t.ColumnToScrub , number , 1 ) , '' ) as ScrubbedValue,
        t.id
    from
        ToBeScrubbed t
        left join master..spt_values P
            on P.number between 1 and len(t.ColumnToScrub)
            and type ='P'
    where
        PatIndex('%[^a-z]%', substring(t.ColumnToScrub,P.number,1) ) = 0
)

SELECT
    id, 
    [1]+ [2]+ [3]+ [4]+ [5]+ [6]+ [7]+ [8] +[9] +[10]
    +  [11]+ [12]+ [13]+ [14]+ [15]+ [16]+ [17]+ [18] +[19] +[20]
    +  [21]+ [22]+ [23]+ [24]+ [25]+ [26]+ [27]+ [28] +[29] +[30]
    +  [31]+ [32]+ [33]+ [34]+ [35]+ [36]+ [37]+ [38] +[39] +[40] as ScrubbedData
FROM (
    select 
        *
    from 
        Scrubbed
    ) 
    src
    PIVOT (
        MAX(ScrubbedValue) FOR ValueOrder IN (
        [1], [2], [3], [4], [5], [6], [7], [8], [9], [10],
        [11], [12], [13], [14], [15], [16], [17], [18], [19], [20],
        [21], [22], [23], [24], [25], [26], [27], [28], [29], [30],
        [31], [32], [33], [34], [35], [36], [37], [38], [39], [40]
        )
    ) pvt
查看更多
只靠听说
3楼-- · 2018-12-31 04:45

--First create one function

CREATE FUNCTION [dbo].[GetNumericonly]
(@strAlphaNumeric VARCHAR(256))
RETURNS VARCHAR(256)
AS
BEGIN
     DECLARE @intAlpha INT
     SET @intAlpha = PATINDEX('%[^0-9]%', @strAlphaNumeric)
BEGIN
     WHILE @intAlpha > 0
   BEGIN
          SET @strAlphaNumeric = STUFF(@strAlphaNumeric, @intAlpha, 1, '' )
          SET @intAlpha = PATINDEX('%[^0-9]%', @strAlphaNumeric )
   END
END
RETURN ISNULL(@strAlphaNumeric,0)
END

Now call this function like

select [dbo].[GetNumericonly]('Abhi12shek23jaiswal')

Its result like

1223
查看更多
春风洒进眼中
4楼-- · 2018-12-31 04:46

Believe it or not, in my system this ugly function performs better than G Mastros elegant one.

CREATE FUNCTION dbo.RemoveSpecialChar (@s VARCHAR(256)) 
RETURNS VARCHAR(256) 
WITH SCHEMABINDING
    BEGIN
        IF @s IS NULL
            RETURN NULL
        DECLARE @s2 VARCHAR(256) = '',
                @l INT = LEN(@s),
                @p INT = 1

        WHILE @p <= @l
            BEGIN
                DECLARE @c INT
                SET @c = ASCII(SUBSTRING(@s, @p, 1))
                IF @c BETWEEN 48 AND 57
                   OR  @c BETWEEN 65 AND 90
                   OR  @c BETWEEN 97 AND 122
                    SET @s2 = @s2 + CHAR(@c)
                SET @p = @p + 1
            END

        IF LEN(@s2) = 0
            RETURN NULL

        RETURN @s2
查看更多
骚的不知所云
5楼-- · 2018-12-31 04:48

I put this in both places where PatIndex is called.

PatIndex('%[^A-Za-z0-9]%', @Temp)

for the custom function above RemoveNonAlphaCharacters and renamed it RemoveNonAlphaNumericCharacters

查看更多
人间绝色
6楼-- · 2018-12-31 04:50

I knew that SQL was bad at string manipulation, but I didn't think it would be this difficult. Here's a simple function to strip out all the numbers from a string. There would be better ways to do this, but this is a start.

CREATE FUNCTION dbo.AlphaOnly (
    @String varchar(100)
)
RETURNS varchar(100)
AS BEGIN
  RETURN (
    REPLACE(
      REPLACE(
        REPLACE(
          REPLACE(
            REPLACE(
              REPLACE(
                REPLACE(
                  REPLACE(
                    REPLACE(
                      REPLACE(
                        @String,
                      '9', ''),
                    '8', ''),
                  '7', ''),
                '6', ''),
              '5', ''),
            '4', ''),
          '3', ''),
        '2', ''),
      '1', ''),
    '0', '')
  )
END
GO

-- ==================
DECLARE @t TABLE (
    ColID       int,
    ColString   varchar(50)
)

INSERT INTO @t VALUES (1, 'abc1234567890')

SELECT ColID, ColString, dbo.AlphaOnly(ColString)
FROM @t

Output

ColID ColString
----- ------------- ---
    1 abc1234567890 abc

Round 2 - Data-Driven Blacklist

-- ============================================
-- Create a table of blacklist characters
-- ============================================
IF EXISTS (SELECT * FROM sys.tables WHERE [object_id] = OBJECT_ID('dbo.CharacterBlacklist'))
  DROP TABLE dbo.CharacterBlacklist
GO
CREATE TABLE dbo.CharacterBlacklist (
    CharID              int         IDENTITY,
    DisallowedCharacter nchar(1)    NOT NULL
)
GO
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'0')
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'1')
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'2')
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'3')
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'4')
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'5')
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'6')
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'7')
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'8')
INSERT INTO dbo.CharacterBlacklist (DisallowedCharacter) VALUES (N'9')
GO

-- ====================================
IF EXISTS (SELECT * FROM sys.objects WHERE [object_id] = OBJECT_ID('dbo.StripBlacklistCharacters'))
  DROP FUNCTION dbo.StripBlacklistCharacters
GO
CREATE FUNCTION dbo.StripBlacklistCharacters (
    @String nvarchar(100)
)
RETURNS varchar(100)
AS BEGIN
  DECLARE @blacklistCt  int
  DECLARE @ct           int
  DECLARE @c            nchar(1)

  SELECT @blacklistCt = COUNT(*) FROM dbo.CharacterBlacklist

  SET @ct = 0
  WHILE @ct < @blacklistCt BEGIN
    SET @ct = @ct + 1

    SELECT @String = REPLACE(@String, DisallowedCharacter, N'')
    FROM dbo.CharacterBlacklist
    WHERE CharID = @ct
  END

  RETURN (@String)
END
GO

-- ====================================
DECLARE @s  nvarchar(24)
SET @s = N'abc1234def5678ghi90jkl'

SELECT
    @s                  AS OriginalString,
    dbo.StripBlacklistCharacters(@s)   AS ResultString

Output

OriginalString           ResultString
------------------------ ------------
abc1234def5678ghi90jkl   abcdefghijkl

My challenge to readers: Can you make this more efficient? What about using recursion?

查看更多
浮光初槿花落
7楼-- · 2018-12-31 04:51

From performance perspective I'd use Inline Function:

SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[udf_RemoveNumericCharsFromString]
(
@List NVARCHAR(4000)
)
RETURNS TABLE 
AS RETURN

    WITH GetNums AS (
       SELECT TOP(ISNULL(DATALENGTH(@List), 0))
        n = ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
        FROM
          (VALUES (0),(0),(0),(0)) d (n),
          (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) e (n),
          (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) f (n),
          (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) g (n)
            )

    SELECT StrOut = ''+
        (SELECT Chr
         FROM GetNums
            CROSS APPLY (SELECT SUBSTRING(@List , n,1)) X(Chr)
         WHERE Chr LIKE '%[^0-9]%' 
         ORDER BY N
         FOR XML PATH (''),TYPE).value('.','NVARCHAR(MAX)')


   /*How to Use
   SELECT StrOut FROM dbo.udf_RemoveNumericCharsFromString ('vv45--9gut')
   Result: vv--gut
   */
查看更多
登录 后发表回答