SQL query to count frequency

2019-04-12 06:59发布

I've got a table which has entries like

id     keywords
1      cat, dog, man, mouse
2      man, pen, pencil, eraser
3      dog, man, friends
4      dog, leash,......

I want to make a table something like

id  cat  dog  man  mouse  pen  pencil  eraser  friends  leash ......
1   1    1    1    1      0    0       0       0        0
2   0    0    1    0      1    1       1       0        0
3   0    1    1    0      0    0       0       1        0

and so on.

4条回答
女痞
2楼-- · 2019-04-12 07:10
WITH basedata(id,keywords) AS
(
SELECT 1,'cat, dog, man, mouse' union all
SELECT 2 ,'man, pen, pencil, eraser' union all
SELECT 3,'dog, man, friends' union all
SELECT 4,'dog, leash'
),
cte(id, t, x)
     AS (SELECT *,
                CAST('<foo>' + REPLACE(keywords,',','</foo><foo>') + '</foo>' AS XML)
         FROM   basedata)   
SELECT id,
       LTRIM(RTRIM(w.value('.', 'nvarchar(max)'))) as keyword
INTO #Split       
FROM   cte
       CROSS APPLY x.nodes('//foo') as word(w) 


DECLARE @ColList nvarchar(max)

SELECT @ColList = ISNULL(@ColList + ',','') + keyword 
FROM (
SELECT DISTINCT QUOTENAME(keyword) AS keyword
FROM #Split
) T

EXEC(N'       
SELECT *
FROM #Split
PIVOT (COUNT(keyword) FOR keyword IN (' + @ColList + N')) P')

DROP TABLE #Split

Gives

id          cat         dog         eraser      friends     leash       man         mouse       pen         pencil
----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- -----------
1           1           1           0           0           0           1           1           0           0
2           0           0           1           0           0           1           0           1           1
3           0           1           0           1           0           1           0           0           0
4           0           1           0           0           1           0           0           0           0
查看更多
冷血范
3楼-- · 2019-04-12 07:10
 SELECT id, 
 CAST(CASE WHEN Charindex('dog', keywords) > 0 THEN 1 ELSE 0 END AS bit) as DOG,
 CAST(CASE WHEN Charindex('cat', keywords) > 0 THEN 1 ELSE 0 END AS bit) as CAT,
 ...
 FROM yourtable
查看更多
再贱就再见
4楼-- · 2019-04-12 07:15

Must you use the pivot form? And is your end result the frequency per id - which seems strange? Otherwise the cells always contain 1 as frequency.

See if this works for you.

Sample data

create table basedata(id int,keywords varchar(max));
insert basedata
SELECT 1,'cat, dog, man, mouse' union all
SELECT 2 ,'man, pen, pencil, eraser' union all
SELECT 3,'dog, man, friends' union all
SELECT 4,'dog, leash'

Query

;with cte(id, list, word) as (
select id,
    cast(STUFF(keywords,1,CHARINDEX(',',keywords+','),'') as varchar(max)),
    cast(ltrim(rtrim(LEFT(keywords,CHARINDEX(',',keywords+',')-1))) as varchar(max))
from basedata
where keywords > ''
union all
select id,
    STUFF(list,1,CHARINDEX(',',list+','),''),
    ltrim(rtrim(LEFT(list,CHARINDEX(',',list+',')-1)))
from cte
where list > ''
)
select word, COUNT(*) frequency
from cte
group by word

Output

word       frequency
---------- -----------
cat        1
dog        3
eraser     1
friends    1
leash      1
man        3
mouse      1
pen        1
pencil     1
查看更多
疯言疯语
5楼-- · 2019-04-12 07:15

If you are using SQL Server 2008, you can use the full-text parser to split your string:

Declare @Inputs Table ( Id int not null Primary Key, Keywords nvarchar(max ) )
Insert @Inputs( Id, Keywords ) Values( 1, 'cat, dog, man, mouse' )
Insert @Inputs( Id, Keywords ) Values( 2, 'man, pen, pencil, eraser' )
Insert @Inputs( Id, Keywords ) Values( 3, 'dog, man, friends' )
Insert @Inputs( Id, Keywords ) Values( 4, 'dog, leash' )

Declare @LCID int
Declare @StopListId int
Declare @AccentSensitive int

Set @LCID = Cast( DatabasePropertyEx('master','LCID') As int )
Set @StopListId = 0
Set @AccentSensitive = 1

Select S.display_term, Count(*) As Frequency
From @Inputs As I
    Cross Apply (
                Select display_term
                From sys.dm_fts_parser( QUOTENAME( I.Keywords, '"')
                    , @LCID, @StopListId, @AccentSensitive)
                ) As S
Group By S.display_term

However, if you are not using SQL Server 2008, then you need a split function. I've presented on at the end of this post. Then your query is simply:

Select LTrim(RTrim(S.Value)), Count(*) As Frequency
From @Inputs As I
    Cross Apply dbo.Split( I.Keywords, ',' ) As S
Group By LTrim(RTrim(S.Value))

And the split function:

Create Function [dbo].[Split]
(   
    @DelimitedList nvarchar(max)
    , @Delimiter varchar(2) = ','
)
RETURNS TABLE 
AS
RETURN 
    (
    With CorrectedList As
        (
        Select Case When Left(@DelimitedList, DataLength(@Delimiter)) <> @Delimiter Then @Delimiter Else '' End
            + @DelimitedList
            + Case When Right(@DelimitedList, DataLength(@Delimiter)) <> @Delimiter Then @Delimiter Else '' End
            As List
            , DataLength(@Delimiter) As DelimiterLen
        )
        , Numbers As 
        (
        Select TOP (Coalesce(Len(@DelimitedList),1)) Row_Number() Over ( Order By c1.object_id ) As Value
        From sys.objects As c1
            Cross Join sys.columns As c2
        )
    Select CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen As Position
        , Substring (
                    CL.List
                    , CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen     
                    , CharIndex(@Delimiter, CL.list, N.Value + 1)                           
                        - ( CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen ) 
                    ) As Value
    From CorrectedList As CL
        Cross Join Numbers As N
    Where N.Value < Len(CL.List)
        And Substring(CL.List, N.Value, CL.DelimiterLen) = @Delimiter
    )
查看更多
登录 后发表回答