Transforming levels of one variable into other var

2019-08-17 12:41发布

问题:

I have a dataset that looks something like this:

IDnum    State    Product    Consumption
123      MI       A          30
123      MI       B          20
123      MI       C          45
456      NJ       A          15
456      NJ       D          10
789      MI       B          60
...      ...      ...        ...

And i would like to create a new dataset, where i have one row for each IDnum, and a new dummy variable for each different product (in my real dataset i have close to 1000 products), along with it's associated consumption. It would look like something in these lines

IDnum   State   Prod.A   Cons.A   Prod.B   Cons.B   Prod.C   Cons.C   Prod.D   Cons.D
123     MI      yes      30       yes      20       yes      45       no       -
456     NJ      yes      15       no       -        no       -        yes      10
789     MI      no       -        yes      60       no       -        no       -
...     ...     ...      ...      ...      ...      ...      ...      ...      ...

Some variables like "State" doesn't change within the same IDnum, but each row in the original bank are equivalent to one purchase, hence the change in the "product" and "consumption" variables for the same IDnum. I would like that my new dataset showed all the consumption habits of each costumer in one single row, but so far i have failed.

Any help would be greatly apreciated.

回答1:

Without yes/no variables, it's really easy:

data input;
length State $2 Product $1;
input IDnum    State    Product    Consumption;
cards;
123      MI       A          30
123      MI       B          20
123      MI       C          45
456      NJ       A          15
456      NJ       D          10
789      MI       B          60
;
run;

proc transpose data=input out=output(drop=_NAME_) prefix=Cons_;
  var Consumption;
  id Product;
  by IDnum State;
run;

Adding the yes/no fields:

    proc sql;/* from column names or alternatively 
                create it from source data directly if not taking too long */
    create table work.products as
        select scan(name, 2, '_') as product length=1
        from dictionary.columns 
                    where libname='WORK' and memname='OUTPUT' 
                      and  upcase(name) like 'CONS_%';
    quit;

    filename vars temp;/* write a temp file containing variable definitions
                       in desired order */
    data _null_;
        set  work.products end=last;
        file vars;
                    length str $40;
        if _N_ = 1 then put 'LENGTH ';
        str = catt('Prod_', product, ' $3');
        put str;
        str = catt('Cons_', product, ' 8');
        put str;
        if last then put ';';
    run;

    options source2;
    data output2;
        length IdNum 8 State $2;
        %include vars;
        set output;
        array prod{*} Prod_:;
        array cons{*} Cons_:;
        drop i;
        do i=1 to dim(prod);
            if coalesce(cons(i), 0) ne 0 then prod(i)='yes';
            else prod(i)='no';
        end;
    run;


标签: sas