I am trying to do some feature selection using mutual_info_regression with SelectKBest wrapper. However I keep running into an error indicating that my list of features needs to be reshaped into a 2D array, not quite sure why I keep getting this message-
#feature selection before linear regression benchmark test
import sklearn
from sklearn.feature_selection import mutual_info_regression, SelectKBest
features = list(housing_data[housing_data.columns.difference(['sale_price'])])
target = 'sale_price'
new = SelectKBest(mutual_info_regression, k=20).fit_transform(features, target)
This is my traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-18-8c778124066c> in <module>()
3 features = list(housing_data[housing_data.columns.difference(['sale_price'])])
4 target = 'sale_price'
----> 5 new = SelectKBest(mutual_info_regression, k=20).fit_transform(features, target)
/usr/local/lib/python3.6/dist-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
463 else:
464 # fit method of arity 2 (supervised transformation)
--> 465 return self.fit(X, y, **fit_params).transform(X)
466
467
/usr/local/lib/python3.6/dist-packages/sklearn/feature_selection/univariate_selection.py in fit(self, X, y)
339 self : object
340 """
--> 341 X, y = check_X_y(X, y, ['csr', 'csc'], multi_output=True)
342
343 if not callable(self.score_func):
/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
754 ensure_min_features=ensure_min_features,
755 warn_on_dtype=warn_on_dtype,
--> 756 estimator=estimator)
757 if multi_output:
758 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
550 "Reshape your data either using array.reshape(-1, 1) if "
551 "your data has a single feature or array.reshape(1, -1) "
--> 552 "if it contains a single sample.".format(array))
553
554 # in the future np.flexible dtypes will be handled like object dtypes
ValueError: Expected 2D array, got 1D array instead:
array=['APPBBL' 'APPDate' 'Address' 'AreaSource' 'AssessLand' 'AssessTot' 'BBL'
'BldgArea' 'BldgClass' 'BldgDepth' 'BldgFront' 'BoroCode' 'Borough'
'BsmtCode' 'BuiltFAR' 'CB2010' 'CD' 'CT2010' 'ComArea' 'CommFAR'
'CondoNo' 'Council' 'EDesigNum' 'Easements' 'ExemptLand' 'ExemptTot'
'Ext' 'FIRM07_FLA' 'FacilFAR' 'FactryArea' 'FireComp' 'GarageArea'
'HealthArea' 'HealthCent' 'HistDist' 'IrrLotCode' 'LandUse' 'Landmark'
'LotArea' 'LotDepth' 'LotFront' 'LotType' 'LtdHeight' 'MAPPLUTO_F'
'NumBldgs' 'NumFloors' 'OfficeArea' 'OtherArea' 'Overlay1' 'Overlay2'
'OwnerName' 'OwnerType' 'PFIRM15_FL' 'PLUTOMapID' 'PolicePrct' 'ProxCode'
'ResArea' 'ResidFAR' 'RetailArea' 'SHAPE_Area' 'SHAPE_Leng' 'SPDist1'
'SPDist2' 'SPDist3' 'Sanborn' 'SanitBoro' 'SanitDistr' 'SanitSub'
'SchoolDist' 'SplitZone' 'StrgeArea' 'TaxMap' 'Tract2010' 'UnitsRes'
'UnitsTotal' 'Unnamed: 0' 'Version' 'XCoord' 'YCoord' 'YearAlter1'
'YearAlter2' 'YearBuilt' 'ZMCode' 'ZipCode' 'ZoneDist1' 'ZoneDist2'
'ZoneDist3' 'ZoneDist4' 'ZoneMap' 'address' 'apartment_number' 'block'
'borough' 'building_class' 'building_class_at_sale'
'building_class_category' 'commercial_units' 'easement' 'gross_sqft'
'land_sqft' 'lot' 'neighborhood' 'price_range' 'residential_units'
'sale_date' 'tax_class' 'tax_class_at_sale' 'total_units' 'year_built'
'year_of_sale' 'zip_code'].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
Here is a sample of my data:
housing_data = pd.DataFrame({'Unnamed: 0': {0: 1, 1: 2, 2: 3, 3: 4}, 'borough': {0: 3, 1: 3, 2: 3, 3: 3}, 'neighborhood': {0: 'DOWNTOWN-METROTECH', 1: 'DOWNTOWN-FULTON FERRY', 2: 'BROOKLYN HEIGHTS', 3: 'MILL BASIN'}, 'building_class_category': {0: '28 COMMERCIAL CONDOS', 1: '29 COMMERCIAL GARAGES', 2: '21 OFFICE BUILDINGS', 3: '22 STORE BUILDINGS'}, 'tax_class': {0: '4', 1: '4', 2: '4', 3: '4'}, 'block': {0: 140, 1: 54, 2: 204, 3: 8470}, 'lot': {0: 1001, 1: 1, 2: 1, 3: 55}, 'easement': {0: nan, 1: nan, 2: nan, 3: nan}, 'building_class': {0: 'R5', 1: 'G7', 2: 'O6', 3: 'K6'}, 'address': {0: '330 JAY STREET', 1: '85 JAY STREET', 2: '29 COLUMBIA HEIGHTS', 3: '5120 AVENUE U'}, 'apartment_number': {0: 'COURT', 1: nan, 2: nan, 3: nan}, 'zip_code': {0: 11201, 1: 11201, 2: 11201, 3: 11234}, 'residential_units': {0: 0, 1: 0, 2: 0, 3: 0}, 'commercial_units': {0: 1, 1: 0, 2: 0, 3: 123}, 'total_units': {0: 1, 1: 0, 2: 0, 3: 123}, 'land_sqft': {0: 0.0, 1: 134988.0, 2: 32000.0, 3: 905000.0}, 'gross_sqft': {0: 0.0, 1: 0.0, 2: 304650.0, 3: 2548000.0}, 'year_built': {0: 2002, 1: 0, 2: 1924, 3: 1970}, 'tax_class_at_sale': {0: 4, 1: 4, 2: 4, 3: 4}, 'building_class_at_sale': {0: 'R5', 1: 'G7', 2: 'O6', 3: 'K6'}, 'sale_price': {0: 499401179.0, 1: 345000000.0, 2: 340000000.0, 3: 276947000.0}, 'sale_date': {0: '2008-04-23', 1: '2016-12-20', 2: '2016-08-03', 3: '2012-11-28'}, 'year_of_sale': {0: 2008, 1: 2016, 2: 2016, 3: 2012}, 'Borough': {0: nan, 1: 'BK', 2: 'BK', 3: 'BK'}, 'CD': {0: nan, 1: 302.0, 2: 302.0, 3: 318.0}, 'CT2010': {0: nan, 1: 21.0, 2: 1.0, 3: 698.0}, 'CB2010': {0: nan, 1: 3017.0, 2: 1003.0, 3: 2005.0}, 'SchoolDist': {0: nan, 1: 13.0, 2: 13.0, 3: 22.0}, 'Council': {0: nan, 1: 33.0, 2: 33.0, 3: 46.0}, 'ZipCode': {0: nan, 1: 11201.0, 2: 11201.0, 3: 11234.0}, 'FireComp': {0: nan, 1: 'L118', 2: 'E205', 3: 'E323'}, 'PolicePrct': {0: nan, 1: 84.0, 2: 84.0, 3: 63.0}, 'HealthCent': {0: nan, 1: 36.0, 2: 38.0, 3: 35.0}, 'HealthArea': {0: nan, 1: 1000.0, 2: 2300.0, 3: 8822.0}, 'SanitBoro': {0: nan, 1: 3.0, 2: 3.0, 3: 3.0}, 'SanitDistr': {0: nan, 1: 2.0, 2: 2.0, 3: 18.0}, 'SanitSub': {0: nan, 1: '1B', 2: '1A', 3: '4E'}, 'Address': {0: nan, 1: '87 JAY STREET', 2: '29 COLUMBIA HEIGHTS', 3: '5120 AVENUE U'}, 'ZoneDist1': {0: nan, 1: 'M1-2/R8', 2: 'M2-1', 3: 'M3-1'}, 'ZoneDist2': {0: nan, 1: nan, 2: nan, 3: nan}, 'ZoneDist3': {0: nan, 1: nan, 2: nan, 3: nan}, 'ZoneDist4': {0: nan, 1: nan, 2: nan, 3: nan}, 'Overlay1': {0: nan, 1: nan, 2: nan, 3: nan}, 'Overlay2': {0: nan, 1: nan, 2: nan, 3: nan}, 'SPDist1': {0: nan, 1: 'MX-2', 2: nan, 3: nan}, 'SPDist2': {0: nan, 1: nan, 2: nan, 3: nan}, 'SPDist3': {0: nan, 1: nan, 2: nan, 3: nan}, 'LtdHeight': {0: nan, 1: nan, 2: nan, 3: nan}, 'SplitZone': {0: nan, 1: 'N', 2: 'N', 3: 'N'}, 'BldgClass': {0: nan, 1: 'G7', 2: 'O6', 3: 'K6'}, 'LandUse': {0: nan, 1: 10.0, 2: 5.0, 3: 5.0}, 'Easements': {0: nan, 1: 0.0, 2: 0.0, 3: 1.0}, 'OwnerType': {0: nan, 1: 'P', 2: nan, 3: nan}, 'OwnerName': {0: nan, 1: '85 JAY STREET BROOKLY', 2: '25-30 COLUMBIA HEIGHT', 3: 'BROOKLYN KINGS PLAZA'}, 'LotArea': {0: nan, 1: 134988.0, 2: 32000.0, 3: 905000.0}, 'BldgArea': {0: nan, 1: 0.0, 2: 304650.0, 3: 2548000.0}, 'ComArea': {0: nan, 1: 0.0, 2: 304650.0, 3: 2548000.0}, 'ResArea': {0: nan, 1: 0.0, 2: 0.0, 3: 0.0}, 'OfficeArea': {0: nan, 1: 0.0, 2: 264750.0, 3: 0.0}, 'RetailArea': {0: nan, 1: 0.0, 2: 0.0, 3: 1263000.0}, 'GarageArea': {0: nan, 1: 0.0, 2: 0.0, 3: 1285000.0}, 'StrgeArea': {0: nan, 1: 0.0, 2: 0.0, 3: 0.0}, 'FactryArea': {0: nan, 1: 0.0, 2: 0.0, 3: 0.0}, 'OtherArea': {0: nan, 1: 0.0, 2: 39900.0, 3: 0.0}, 'AreaSource': {0: nan, 1: 7.0, 2: 2.0, 3: 2.0}, 'NumBldgs': {0: nan, 1: 0.0, 2: 1.0, 3: 4.0}, 'NumFloors': {0: nan, 1: 0.0, 2: 13.0, 3: 2.0}, 'UnitsRes': {0: nan, 1: 0.0, 2: 0.0, 3: 0.0}, 'UnitsTotal': {0: nan, 1: 0.0, 2: 0.0, 3: 123.0}, 'LotFront': {0: nan, 1: 490.5, 2: 92.42, 3: 930.0}, 'LotDepth': {0: nan, 1: 275.33, 2: 335.92, 3: 859.0}, 'BldgFront': {0: nan, 1: 0.0, 2: 335.0, 3: 0.0}, 'BldgDepth': {0: nan, 1: 0.0, 2: 92.0, 3: 0.0}, 'Ext': {0: nan, 1: nan, 2: nan, 3: nan}, 'ProxCode': {0: nan, 1: 0.0, 2: 0.0, 3: 0.0}, 'IrrLotCode': {0: nan, 1: 'N', 2: 'Y', 3: 'Y'}, 'LotType': {0: nan, 1: 5.0, 2: 3.0, 3: 3.0}, 'BsmtCode': {0: nan, 1: 5.0, 2: 5.0, 3: 5.0}, 'AssessLand': {0: nan, 1: 1571850.0, 2: 1548000.0, 3: 36532350.0}, 'AssessTot': {0: nan, 1: 1571850.0, 2: 25463250.0, 3: 149792400.0}, 'ExemptLand': {0: nan, 1: 1571850.0, 2: 0.0, 3: 0.0}, 'ExemptTot': {0: nan, 1: 1571850.0, 2: 0.0, 3: 0.0}, 'YearBuilt': {0: nan, 1: 0.0, 2: 1924.0, 3: 1970.0}, 'YearAlter1': {0: nan, 1: 0.0, 2: 1980.0, 3: 0.0}, 'YearAlter2': {0: nan, 1: 0.0, 2: 0.0, 3: 0.0}, 'HistDist': {0: nan, 1: nan, 2: nan, 3: nan}, 'Landmark': {0: nan, 1: nan, 2: nan, 3: nan}, 'BuiltFAR': {0: nan, 1: 0.0, 2: 9.52, 3: 2.82}, 'ResidFAR': {0: nan, 1: 7.2, 2: 0.0, 3: 0.0}, 'CommFAR': {0: nan, 1: 2.0, 2: 2.0, 3: 2.0}, 'FacilFAR': {0: nan, 1: 6.5, 2: 0.0, 3: 0.0}, 'BoroCode': {0: nan, 1: 3.0, 2: 3.0, 3: 3.0}, 'BBL': {0: nan, 1: 3000540001.0, 2: 3002040001.0, 3: 3084700055.0}, 'CondoNo': {0: nan, 1: 0.0, 2: 0.0, 3: 0.0}, 'Tract2010': {0: nan, 1: 21.0, 2: 1.0, 3: 698.0}, 'XCoord': {0: nan, 1: 988208.0, 2: 985952.0, 3: 1006597.0}, 'YCoord': {0: nan, 1: 195011.0, 2: 195007.0, 3: 161424.0}, 'ZoneMap': {0: nan, 1: '12d', 2: '12d', 3: '23b'}, 'ZMCode': {0: nan, 1: nan, 2: nan, 3: nan}, 'Sanborn': {0: nan, 1: '302 016', 2: '302 004', 3: '319 077'}, 'TaxMap': {0: nan, 1: 30101.0, 2: 30106.0, 3: 32502.0}, 'EDesigNum': {0: nan, 1: nan, 2: nan, 3: nan}, 'APPBBL': {0: nan, 1: 3000540001.0, 2: 0.0, 3: 0.0}, 'APPDate': {0: nan, 1: '12/06/2002', 2: nan, 3: nan}, 'PLUTOMapID': {0: nan, 1: 1.0, 2: 1.0, 3: 1.0}, 'FIRM07_FLA': {0: nan, 1: nan, 2: nan, 3: 1.0}, 'PFIRM15_FL': {0: nan, 1: nan, 2: nan, 3: 1.0}, 'Version': {0: nan, 1: '17V1.1', 2: '17V1.1', 3: '17V1.1'}, 'MAPPLUTO_F': {0: nan, 1: 0.0, 2: 0.0, 3: 0.0}, 'SHAPE_Leng': {0: nan, 1: 1559.88914353, 2: 890.718521021, 3: 3729.78685686}, 'SHAPE_Area': {0: nan, 1: 140131.577176, 2: 34656.4472405, 3: 797554.847834}, 'price_range': {0: nan, 1: nan, 2: nan, 3: nan}})